Add configurable GPU benchmarks

samuelburnham · Oct 30, 2023 · 7f2747e · 7f2747e
1 parent b7e603a
commit 7f2747e
Show file tree

Hide file tree

Showing 6 changed files with 156 additions and 68 deletions.
diff --git a/.github/workflows/bench-deploy.yml b/.github/workflows/bench-deploy.yml
@@ -7,28 +7,18 @@ on:
 jobs:
   benchmark:
     name: Bench and deploy
-    runs-on: gpu-bench-a6000
-    env:
-      NVIDIA_VISIBLE_DEVICES: all
-      NVIDIA_DRIVER_CAPABILITITES: compute,utility
-      EC_GPU_FRAMEWORK: cuda
+    runs-on: [self-hosted, gpu-bench-t4]
     steps:
       # Install deps
       - uses: actions-rs/toolchain@v1
-      #- uses: Swatinem/rust-cache@v2
-      - uses: taiki-e/install-action@just
+      - uses: Swatinem/rust-cache@v2
+      - uses: taiki-e/install-action@v2
+        with:
+          tool: just@v1
 
       # Set up GPU
       # Check we have access to the machine's Nvidia drivers
       - run: nvidia-smi
-      # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
-      # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
-      # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
-      - name: Set env for CUDA compute
-        run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> $GITHUB_ENV
-      - name: set env for EC_GPU
-        run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> $GITHUB_ENV
-      - run: echo "${{ env.EC_GPU_CUDA_NVCC_ARGS}}"
       # Check that CUDA is installed with a driver-compatible version
       # This must also be compatible with the GPU architecture, see above link
       - run: nvcc --version
@@ -43,15 +33,17 @@ jobs:
       - name: Install criterion
         run: cargo install cargo-criterion
       - name: Run benchmarks
-        run: just --dotenv-filename bench.env deploy-bench
+        run: just --dotenv-filename bench.env gpu-bench fibonacci
+      - name: Compress artifacts
+        run: tar -cvzf ${{ github.sha }}.tar.gz Cargo.lock ${{ github.sha }}.json
       - name: Deploy latest benchmark report
         uses: peaceiris/actions-gh-pages@v3
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: ./target/criterion
           destination_dir: benchmarks/criterion
       - name: Move benchmark json to history
-        run: mkdir history; cp Cargo.lock ${{ github.sha }}.json history/
+        run: mkdir history; cp ${{ github.sha }}.tar.gz history/
       - name: Deploy benchmark history
         uses: peaceiris/actions-gh-pages@v3
         with:

diff --git a/.github/workflows/bench-pr-comment.yml b/.github/workflows/bench-pr-comment.yml
@@ -39,30 +39,52 @@ jobs:
           # Needed. The name of the branch to compare with
           branchName: ${{ github.ref_name }}
 
+  # TODO: Check it works with forked PRs when running
+  # `gh pr checkout {{ github.event.issue.number}}` with `env: GH_TOKEN`
   gpu-benchmark:
     name: run fibonacci benchmark on GPU
-    runs-on: [self-hosted, gpu-bench]
+    runs-on: [self-hosted, gpu-bench-t4]
     if:
       github.event.issue.pull_request
       && github.event.issue.state == 'open'
       && contains(github.event.comment.body, '!benchmark')
       && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER')
     steps:
+      # Set up GPU
+      # Check we have access to the machine's Nvidia drivers
+      - run: nvidia-smi
+      # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
+      # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
+      # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
+      # Writes env vars to `bench.env` to be read by `just` command
+      - name: Set env for CUDA compute
+        run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> bench.env
+      - name: set env for EC_GPU
+        run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> bench.env
+      # Check that CUDA is installed with a driver-compatible version
+      # This must also be compatible with the GPU architecture, see above link
+      - run: nvcc --version
+
       - uses: xt0rted/pull-request-comment-branch@v2
         id: comment-branch
-
       - uses: actions/checkout@v4
         if: success()
         with:
           ref: ${{ steps.comment-branch.outputs.head_ref }}
-      # Set the Rust env vars
       - uses: actions-rs/toolchain@v1
       - uses: Swatinem/rust-cache@v2
+      # Strict load => panic if .env file not found
+      - name: Load env vars
+        uses: xom9ikk/dotenv@v2
+        with:
+          path: bench.env
+          load-mode: strict
+
       - uses: boa-dev/criterion-compare-action@v3
         with:
           # Optional. Compare only this benchmark target
           benchName: "fibonacci"
           # Optional. Features activated in the benchmark
-          features: "cuda,opencl"
+          features: "cuda"
           # Needed. The name of the branch to compare with
           branchName: ${{ github.ref_name }}
diff --git a/.github/workflows/merge-group.yml b/.github/workflows/merge-group.yml
@@ -67,38 +67,68 @@ jobs:
           cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)'
 
   # TODO: Make this a required status check
+  # TODO: Cache successful bench run from PR branch on master, keyed on commit hash
+  # Run comparative benchmark against master
   gpu-benchmark:
+    # [TEMPORARY] Test one run before attempting merge
+    #if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
     name: Run fibonacci bench on GPU
-    runs-on: gpu-bench-a6000
-    env:
-      NVIDIA_VISIBLE_DEVICES: all
-      NVIDIA_DRIVER_CAPABILITITES: compute,utility
-      EC_GPU_FRAMEWORK: cuda
+    runs-on: [self-hosted, gpu-bench-t4]
     steps:
-      # Checkout and install deps
-      - uses: actions/checkout@v4
-      - uses: actions-rs/toolchain@v1
-      - uses: Swatinem/rust-cache@v2
-      - uses: taiki-e/install-action@just
-
+      # TODO: Factor this out into an action or into justfile, it's used in 4 places
       # Set up GPU
       # Check we have access to the machine's Nvidia drivers
       - run: nvidia-smi
-      # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
-      # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
-      # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
-      - name: Set env for CUDA compute
-        run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> $GITHUB_ENV
-      - name: set env for EC_GPU
-        run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> $GITHUB_ENV
-      - run: echo "${{ env.EC_GPU_CUDA_NVCC_ARGS}}"
       # Check that CUDA is installed with a driver-compatible version
       # This must also be compatible with the GPU architecture, see above link
       - run: nvcc --version
-
-      # Run benchmark
+      - uses: actions/checkout@v4
+      # Checkout base branch for comparative bench
+      - uses: actions/checkout@v4
+        with:
+          ref: master
+      - run: ls -a
+      - name: Set base ref variable
+        run: echo "BASE_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+      # Checkout the justfile and env of the source branch so the base can bench
+      - run: git restore --source ${{ github.sha }} justfile bench.env
+      - run: ls -a
+      # Install dependencies
+      - uses: actions-rs/toolchain@v1
+      - uses: Swatinem/rust-cache@v2
+      - uses: taiki-e/install-action@v2
+        with:
+          tool: just@1
+      # Run benchmark on base branch
       - name: Install criterion
-        run: cargo install cargo-criterion
-      # TODO: Write this in a commit comment, reject if there's a regression
-      - name: Run benchmarks
-        run: just --dotenv-filename bench.env gpu-bench
+        run: |
+          cargo install cargo-criterion
+          cargo install criterion-table
+      - name: Run GPU bench
+        run: just --dotenv-filename bench.env gpu-bench fibonacci
+      # Switch to triggering branch and run benchmark
+      - run: rm justfile bench.env
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.sha }}
+      - name: Run GPU bench on source branch
+        run: just --dotenv-filename bench.env gpu-bench fibonacci
+      # Create a comparative `criterion-table` and write in commit comment
+      - name: Run `criterion-table`
+        run: cat ${{ env.BASE_REF }}.json ${{ github.sha }}.json | criterion-table > BENCHMARKS.md
+      - name: Write comparative bench on commit comment
+        uses: peter-evans/commit-comment@v3
+        with:
+          body-path: BENCHMARKS.md
+      # TODO: Use jq for JSON parsing if needed
+      # Check for benchmark regression based on Criterion's configured noise threshold
+      - name: Performance regression check
+        id: check-regression
+        run: |
+          echo "regress_count=$(grep -c 'Regressed' ${{ github.sha }}.json)" >> $GITHUB_OUTPUT
+      # Fail job if regression found
+      - uses: actions/github-script@v6
+        if: ${{ steps.check-regression.outputs.regress_count }} > 0
+        with:
+          script: |
+            core.setFailed('Fibonacci bench regression detected')
diff --git a/bench.env b/bench.env
@@ -1,2 +1,9 @@
-LURK_PERF_CONFIG=max-parallel-simple
-LURK_RC=100,600
+# Lurk config
+LURK_PERF=max-parallel-simple
+LURK_RC=100,600
+LURK_BENCH_NOISE_THRESHOLD=0.10
+
+# CUDA config
+NVIDIA_VISIBLE_DEVICES=all
+NVIDIA_DRIVER_CAPABILITITES=compute,utility
+EC_GPU_FRAMEWORK=cuda
diff --git a/benches/fibonacci.rs b/benches/fibonacci.rs
@@ -1,5 +1,6 @@
 use std::{cell::RefCell, rc::Rc, sync::Arc, time::Duration};
 
+use anyhow::anyhow;
 use criterion::{
     black_box, criterion_group, criterion_main, measurement, BatchSize, BenchmarkGroup,
     BenchmarkId, Criterion, SamplingMode,
@@ -118,24 +119,44 @@ fn fibo_prove<M: measurement::Measurement>(
     );
 }
 
-fn get_rc_env() -> Vec<usize> {
-    let rc_env = std::env::var("LURK_RC").unwrap_or("100".into());
-    rc_env
-        .split(',')
-        .filter_map(|rc| rc.parse::<usize>().ok())
-        .collect()
+fn rc_env() -> anyhow::Result<Vec<usize>> {
+    std::env::var("LURK_RC")
+        .map_err(|e| anyhow!("Reduction count env var isn't set: {e}"))
+        .and_then(|rc| {
+            let vec: anyhow::Result<Vec<usize>> = rc
+                .split(',')
+                .map(|rc| {
+                    rc.parse::<usize>()
+                        .map_err(|e| anyhow!("Failed to parse RC: {e}"))
+                })
+                .collect();
+            vec
+        })
+}
+
+fn noise_threshold_env() -> anyhow::Result<f64> {
+    std::env::var("LURK_BENCH_NOISE_THRESHOLD")
+        .map_err(|e| anyhow!("Noise threshold env var isn't set: {e}"))
+        .and_then(|nt| {
+            nt.parse::<f64>()
+                .map_err(|e| anyhow!("Failed to parse noise threshold: {e}"))
+        })
 }
 
 fn fibonacci_prove(c: &mut Criterion) {
+    tracing_subscriber::fmt::init();
     set_bench_config();
     tracing::debug!("{:?}", lurk::config::LURK_CONFIG);
-    let reduction_counts = get_rc_env();
+
+    let reduction_counts = rc_env().unwrap_or_else(|_| vec![100]);
     tracing::debug!("Fibonacci bench RCs: {:?}", &reduction_counts);
     let batch_sizes = [100, 200];
 
     let mut group: BenchmarkGroup<'_, _> = c.benchmark_group("Prove");
     group.sampling_mode(SamplingMode::Flat); // This can take a *while*
     group.sample_size(10);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
+
     let state = State::init_lurk_state().rccell();
 
     for fib_n in batch_sizes.iter() {

diff --git a/justfile b/justfile
@@ -1,19 +1,35 @@
 # Install with `cargo install just`
 # Usage: `just --dotenv-filename /path/to/file.env <bench|gpu-bench>`
+# TODO: Move dotenv-filename into justfile once the feature is available
 set dotenv-load
 
-print-lurk-env:
-  echo $LURK_PERF_CONFIG
-  echo $LURK_RC
+commit := `git rev-parse HEAD`
 
-bench:
-  print-lurk-env
-  cargo criterion --bench fibonacci
+# Run CPU benchmarks
+bench +benches:
+  #!/bin/sh
+  printenv LURK
+  if [ '{{benches}}' != '' ]; then
+    for bench in {{benches}}; do
+      cargo criterion --bench $bench
+    done
+  else
+    echo "Invalid input, enter at least one non-empty string"
+  fi
 
-gpu-bench:
-  print-lurk-env
-  cargo criterion --bench fibonacci
-
-deploy-bench:
-  print-lurk-env
-  cargo criterion --bench fibonacci --message-format=json > ${{ github.sha }}.json
+# Run CUDA benchmarks on GPU
+gpu-bench +benches:
+  #!/bin/sh
+  # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
+  # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
+  # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
+  export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')
+  export EC_GPU_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_$CUDA_ARCH --generate-code=arch=compute_$CUDA_ARCH,code=sm_$CUDA_ARCH"
+  env | grep -E "LURK|EC_GPU|CUDA"
+  if [ '{{benches}}' != '' ]; then
+    for bench in {{benches}}; do
+      cargo criterion --bench $bench --features "cuda" --message-format=json 2>&1 > {{commit}}.json
+    done
+  else
+    echo "Invalid input, enter at least one non-empty string"
+  fi