Skip to content

Commit

Permalink
Add configurable GPU benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelburnham committed Oct 30, 2023
1 parent b7e603a commit 7f2747e
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 68 deletions.
26 changes: 9 additions & 17 deletions .github/workflows/bench-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,18 @@ on:
jobs:
benchmark:
name: Bench and deploy
runs-on: gpu-bench-a6000
env:
NVIDIA_VISIBLE_DEVICES: all
NVIDIA_DRIVER_CAPABILITITES: compute,utility
EC_GPU_FRAMEWORK: cuda
runs-on: [self-hosted, gpu-bench-t4]
steps:
# Install deps
- uses: actions-rs/toolchain@v1
#- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@just
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@v2
with:
tool: just@v1

# Set up GPU
# Check we have access to the machine's Nvidia drivers
- run: nvidia-smi
# The `compute`/`sm` number corresponds to the Nvidia GPU architecture
# In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
- name: Set env for CUDA compute
run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> $GITHUB_ENV
- name: set env for EC_GPU
run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> $GITHUB_ENV
- run: echo "${{ env.EC_GPU_CUDA_NVCC_ARGS}}"
# Check that CUDA is installed with a driver-compatible version
# This must also be compatible with the GPU architecture, see above link
- run: nvcc --version
Expand All @@ -43,15 +33,17 @@ jobs:
- name: Install criterion
run: cargo install cargo-criterion
- name: Run benchmarks
run: just --dotenv-filename bench.env deploy-bench
run: just --dotenv-filename bench.env gpu-bench fibonacci
- name: Compress artifacts
run: tar -cvzf ${{ github.sha }}.tar.gz Cargo.lock ${{ github.sha }}.json
- name: Deploy latest benchmark report
uses: peaceiris/actions-gh-pages@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./target/criterion
destination_dir: benchmarks/criterion
- name: Move benchmark json to history
run: mkdir history; cp Cargo.lock ${{ github.sha }}.json history/
run: mkdir history; cp ${{ github.sha }}.tar.gz history/
- name: Deploy benchmark history
uses: peaceiris/actions-gh-pages@v3
with:
Expand Down
30 changes: 26 additions & 4 deletions .github/workflows/bench-pr-comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,30 +39,52 @@ jobs:
# Needed. The name of the branch to compare with
branchName: ${{ github.ref_name }}

# TODO: Check it works with forked PRs when running
# `gh pr checkout {{ github.event.issue.number}}` with `env: GH_TOKEN`
gpu-benchmark:
name: run fibonacci benchmark on GPU
runs-on: [self-hosted, gpu-bench]
runs-on: [self-hosted, gpu-bench-t4]
if:
github.event.issue.pull_request
&& github.event.issue.state == 'open'
&& contains(github.event.comment.body, '!benchmark')
&& (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER')
steps:
# Set up GPU
# Check we have access to the machine's Nvidia drivers
- run: nvidia-smi
# The `compute`/`sm` number corresponds to the Nvidia GPU architecture
# In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
# Writes env vars to `bench.env` to be read by `just` command
- name: Set env for CUDA compute
run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> bench.env
- name: set env for EC_GPU
run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> bench.env
# Check that CUDA is installed with a driver-compatible version
# This must also be compatible with the GPU architecture, see above link
- run: nvcc --version

- uses: xt0rted/pull-request-comment-branch@v2
id: comment-branch

- uses: actions/checkout@v4
if: success()
with:
ref: ${{ steps.comment-branch.outputs.head_ref }}
# Set the Rust env vars
- uses: actions-rs/toolchain@v1
- uses: Swatinem/rust-cache@v2
# Strict load => panic if .env file not found
- name: Load env vars
uses: xom9ikk/dotenv@v2
with:
path: bench.env
load-mode: strict

- uses: boa-dev/criterion-compare-action@v3
with:
# Optional. Compare only this benchmark target
benchName: "fibonacci"
# Optional. Features activated in the benchmark
features: "cuda,opencl"
features: "cuda"
# Needed. The name of the branch to compare with
branchName: ${{ github.ref_name }}
80 changes: 55 additions & 25 deletions .github/workflows/merge-group.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,38 +67,68 @@ jobs:
cargo nextest run --profile ci --workspace --cargo-profile dev-no-assertions -E 'test(circuit::gadgets)'
# TODO: Make this a required status check
# TODO: Cache successful bench run from PR branch on master, keyed on commit hash
# Run comparative benchmark against master
gpu-benchmark:
# [TEMPORARY] Test one run before attempting merge
#if: github.event_name != 'pull_request' || github.event.action == 'enqueued'
name: Run fibonacci bench on GPU
runs-on: gpu-bench-a6000
env:
NVIDIA_VISIBLE_DEVICES: all
NVIDIA_DRIVER_CAPABILITITES: compute,utility
EC_GPU_FRAMEWORK: cuda
runs-on: [self-hosted, gpu-bench-t4]
steps:
# Checkout and install deps
- uses: actions/checkout@v4
- uses: actions-rs/toolchain@v1
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@just

# TODO: Factor this out into an action or into justfile, it's used in 4 places
# Set up GPU
# Check we have access to the machine's Nvidia drivers
- run: nvidia-smi
# The `compute`/`sm` number corresponds to the Nvidia GPU architecture
# In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
- name: Set env for CUDA compute
run: echo "CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')" >> $GITHUB_ENV
- name: set env for EC_GPU
run: echo 'EC_GPU_CUDA_NVCC_ARGS=--fatbin --gpu-architecture=sm_${{ env.CUDA_ARCH }} --generate-code=arch=compute_${{ env.CUDA_ARCH }},code=sm_${{ env.CUDA_ARCH }}' >> $GITHUB_ENV
- run: echo "${{ env.EC_GPU_CUDA_NVCC_ARGS}}"
# Check that CUDA is installed with a driver-compatible version
# This must also be compatible with the GPU architecture, see above link
- run: nvcc --version

# Run benchmark
- uses: actions/checkout@v4
# Checkout base branch for comparative bench
- uses: actions/checkout@v4
with:
ref: master
- run: ls -a
- name: Set base ref variable
run: echo "BASE_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
# Checkout the justfile and env of the source branch so the base can bench
- run: git restore --source ${{ github.sha }} justfile bench.env
- run: ls -a
# Install dependencies
- uses: actions-rs/toolchain@v1
- uses: Swatinem/rust-cache@v2
- uses: taiki-e/install-action@v2
with:
tool: just@1
# Run benchmark on base branch
- name: Install criterion
run: cargo install cargo-criterion
# TODO: Write this in a commit comment, reject if there's a regression
- name: Run benchmarks
run: just --dotenv-filename bench.env gpu-bench
run: |
cargo install cargo-criterion
cargo install criterion-table
- name: Run GPU bench
run: just --dotenv-filename bench.env gpu-bench fibonacci
# Switch to triggering branch and run benchmark
- run: rm justfile bench.env
- uses: actions/checkout@v4
with:
ref: ${{ github.sha }}
- name: Run GPU bench on source branch
run: just --dotenv-filename bench.env gpu-bench fibonacci
# Create a comparative `criterion-table` and write in commit comment
- name: Run `criterion-table`
run: cat ${{ env.BASE_REF }}.json ${{ github.sha }}.json | criterion-table > BENCHMARKS.md
- name: Write comparative bench on commit comment
uses: peter-evans/commit-comment@v3
with:
body-path: BENCHMARKS.md
# TODO: Use jq for JSON parsing if needed
# Check for benchmark regression based on Criterion's configured noise threshold
- name: Performance regression check
id: check-regression
run: |
echo "regress_count=$(grep -c 'Regressed' ${{ github.sha }}.json)" >> $GITHUB_OUTPUT
# Fail job if regression found
- uses: actions/github-script@v6
if: ${{ steps.check-regression.outputs.regress_count }} > 0
with:
script: |
core.setFailed('Fibonacci bench regression detected')
11 changes: 9 additions & 2 deletions bench.env
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
LURK_PERF_CONFIG=max-parallel-simple
LURK_RC=100,600
# Lurk config
LURK_PERF=max-parallel-simple
LURK_RC=100,600
LURK_BENCH_NOISE_THRESHOLD=0.10

# CUDA config
NVIDIA_VISIBLE_DEVICES=all
NVIDIA_DRIVER_CAPABILITITES=compute,utility
EC_GPU_FRAMEWORK=cuda
35 changes: 28 additions & 7 deletions benches/fibonacci.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{cell::RefCell, rc::Rc, sync::Arc, time::Duration};

use anyhow::anyhow;
use criterion::{
black_box, criterion_group, criterion_main, measurement, BatchSize, BenchmarkGroup,
BenchmarkId, Criterion, SamplingMode,
Expand Down Expand Up @@ -118,24 +119,44 @@ fn fibo_prove<M: measurement::Measurement>(
);
}

fn get_rc_env() -> Vec<usize> {
let rc_env = std::env::var("LURK_RC").unwrap_or("100".into());
rc_env
.split(',')
.filter_map(|rc| rc.parse::<usize>().ok())
.collect()
fn rc_env() -> anyhow::Result<Vec<usize>> {
std::env::var("LURK_RC")
.map_err(|e| anyhow!("Reduction count env var isn't set: {e}"))
.and_then(|rc| {
let vec: anyhow::Result<Vec<usize>> = rc
.split(',')
.map(|rc| {
rc.parse::<usize>()
.map_err(|e| anyhow!("Failed to parse RC: {e}"))
})
.collect();
vec
})
}

fn noise_threshold_env() -> anyhow::Result<f64> {
std::env::var("LURK_BENCH_NOISE_THRESHOLD")
.map_err(|e| anyhow!("Noise threshold env var isn't set: {e}"))
.and_then(|nt| {
nt.parse::<f64>()
.map_err(|e| anyhow!("Failed to parse noise threshold: {e}"))
})
}

fn fibonacci_prove(c: &mut Criterion) {
tracing_subscriber::fmt::init();
set_bench_config();
tracing::debug!("{:?}", lurk::config::LURK_CONFIG);
let reduction_counts = get_rc_env();

let reduction_counts = rc_env().unwrap_or_else(|_| vec![100]);
tracing::debug!("Fibonacci bench RCs: {:?}", &reduction_counts);
let batch_sizes = [100, 200];

let mut group: BenchmarkGroup<'_, _> = c.benchmark_group("Prove");
group.sampling_mode(SamplingMode::Flat); // This can take a *while*
group.sample_size(10);
group.noise_threshold(noise_threshold_env().unwrap_or(0.05));

let state = State::init_lurk_state().rccell();

for fib_n in batch_sizes.iter() {
Expand Down
42 changes: 29 additions & 13 deletions justfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
# Install with `cargo install just`
# Usage: `just --dotenv-filename /path/to/file.env <bench|gpu-bench>`
# TODO: Move dotenv-filename into justfile once the feature is available
set dotenv-load

print-lurk-env:
echo $LURK_PERF_CONFIG
echo $LURK_RC
commit := `git rev-parse HEAD`

bench:
print-lurk-env
cargo criterion --bench fibonacci
# Run CPU benchmarks
bench +benches:
#!/bin/sh
printenv LURK
if [ '{{benches}}' != '' ]; then
for bench in {{benches}}; do
cargo criterion --bench $bench
done
else
echo "Invalid input, enter at least one non-empty string"
fi

gpu-bench:
print-lurk-env
cargo criterion --bench fibonacci

deploy-bench:
print-lurk-env
cargo criterion --bench fibonacci --message-format=json > ${{ github.sha }}.json
# Run CUDA benchmarks on GPU
gpu-bench +benches:
#!/bin/sh
# The `compute`/`sm` number corresponds to the Nvidia GPU architecture
# In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
# See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')
export EC_GPU_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_$CUDA_ARCH --generate-code=arch=compute_$CUDA_ARCH,code=sm_$CUDA_ARCH"
env | grep -E "LURK|EC_GPU|CUDA"
if [ '{{benches}}' != '' ]; then
for bench in {{benches}}; do
cargo criterion --bench $bench --features "cuda" --message-format=json 2>&1 > {{commit}}.json
done
else
echo "Invalid input, enter at least one non-empty string"
fi

0 comments on commit 7f2747e

Please sign in to comment.