From 2066ae46283435546bd9130bde796b744404f7f3 Mon Sep 17 00:00:00 2001
From: Samuel Burnham <45365069+samuelburnham@users.noreply.github.com>
Date: Fri, 12 Jan 2024 09:43:02 -0500
Subject: [PATCH] Refactor Supernova benchmarks and config for CI (#253)

* Refactor Supernova benchmarks and config for CI

* Enable `asm` feature in GPU CI benchmarks

* Clean up and address feedback
---
 .github/PERF_REGRESSION.md            |   2 +-
 .github/workflows/gpu-bench.yml       |  39 ++--
 Cargo.toml                            |   4 +
 benches/common/mod.rs                 |  11 +-
 benches/common/supernova/bench.rs     | 181 +++++++++++++++
 benches/common/supernova/mod.rs       | 168 ++++++++++++++
 benches/common/supernova/targets.rs   |  54 +++++
 benches/compressed-snark-supernova.rs | 310 +-------------------------
 benches/justfile                      |  24 +-
 benches/recursive-snark-supernova.rs  | 267 +---------------------
 benches/supernova-ci.rs               |  31 +++
 11 files changed, 489 insertions(+), 602 deletions(-)
 create mode 100644 benches/common/supernova/bench.rs
 create mode 100644 benches/common/supernova/mod.rs
 create mode 100644 benches/common/supernova/targets.rs
 create mode 100644 benches/supernova-ci.rs

diff --git a/.github/PERF_REGRESSION.md b/.github/PERF_REGRESSION.md
index 090be25bd..dd1c8c59f 100644
--- a/.github/PERF_REGRESSION.md
+++ b/.github/PERF_REGRESSION.md
@@ -2,6 +2,6 @@
 title: ":rotating_light: Performance regression in #{{ env.PR_NUMBER }}"
 labels: P-Performance, automated issue
 ---
-Regression >= {{ env.NOISE_THRESHOLD }} found during merge of: #{{ env.PR_NUMBER }}
+Regression >= {{ env.NOISE_THRESHOLD }}% found during merge of: #{{ env.PR_NUMBER }}
 Commit: {{ env.GIT_SHA }}
 Triggered by: {{ env.WORKFLOW_URL }}
\ No newline at end of file
diff --git a/.github/workflows/gpu-bench.yml b/.github/workflows/gpu-bench.yml
index db826474f..bf3711eff 100644
--- a/.github/workflows/gpu-bench.yml
+++ b/.github/workflows/gpu-bench.yml
@@ -1,4 +1,4 @@
-# Run final tests only when attempting to merge, shown as skipped status checks beforehand
+# Run regression check only when attempting to merge, shown as skipped status check beforehand
 name: GPU benchmark regression test
 
 on:
@@ -39,6 +39,7 @@ jobs:
       - name: Set bench output format and base SHA
         run: |
           echo "ARECIBO_BENCH_OUTPUT=commit-comment" | tee -a $GITHUB_ENV
+          echo "ARECIBO_BENCH_NUM_CONS=16384,1038732" | tee -a $GITHUB_ENV
           echo "BASE_COMMIT=${{ github.event.merge_group.base_sha }}" | tee -a $GITHUB_ENV
           GPU_NAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader,nounits | tail -n1)
           echo "GPU_ID=$(echo $GPU_NAME | awk '{ print $NF }')" | tee -a $GITHUB_ENV
@@ -54,14 +55,14 @@ jobs:
           # Copy justfile to dev, overwriting existing config with that of PR branch
           cp ../benches/justfile .
           # Run benchmark
-          just gpu-bench-ci recursive-snark recursive-snark-supernova compressed-snark compressed-snark-supernova
+          just gpu-bench-ci supernova-ci
           # Copy bench output to PR branch
-          cp *-${{ env.BASE_COMMIT }}.json ..
+          cp supernova-ci-${{ env.BASE_COMMIT }}.json ..
         working-directory: ${{ github.workspace }}/dev
       - name: Run GPU bench on PR branch
         run: |
-          just gpu-bench-ci recursive-snark recursive-snark-supernova compressed-snark compressed-snark-supernova
-          cp *-${{ github.sha }}.json ..
+          just gpu-bench-ci supernova-ci
+          cp supernova-ci-${{ github.sha }}.json ..
         working-directory: ${{ github.workspace }}/benches
       - name: copy the benchmark template and prepare it with data
         run: |
@@ -77,7 +78,7 @@ jobs:
           # Use conditionals to ensure that only non-empty variables are inserted
           [[ ! -z "${{ env.GPU_NAME }}" ]] && sed -i "/^\"\"\"$/i ${{ env.GPU_NAME }}" tables.toml
           [[ ! -z "$CPU_MODEL" ]] && sed -i "/^\"\"\"$/i $CPU_MODEL" tables.toml
-          [[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS" tables.toml
+          [[ ! -z "$NUM_VCPUS" ]] && sed -i "/^\"\"\"$/i $NUM_VCPUS vCPUs" tables.toml
           [[ ! -z "$TOTAL_RAM" ]] && sed -i "/^\"\"\"$/i $TOTAL_RAM" tables.toml          
           sed -i "/^\"\"\"$/i Workflow run: $WORKFLOW_URL" tables.toml
           echo "WORKFLOW_URL=$WORKFLOW_URL" | tee -a $GITHUB_ENV
@@ -85,36 +86,34 @@ jobs:
       # Create a `criterion-table` and write in commit comment
       - name: Run `criterion-table`
         run: |
-          cat recursive-snark-${{ env.BASE_COMMIT }}.json recursive-snark-${{ github.sha }}.json \
-          recursive-snark-supernova-${{ env.BASE_COMMIT }}.json recursive-snark-supernova- ${{ github.sha }}.json \
-          compressed-snark-${{ env.BASE_COMMIT }}.json compressed-snark-${{ github.sha }}.json \
-          compressed-snark-supernova-${{ env.BASE_COMMIT }}.json compressed-snark-supernova- ${{ github.sha }}.json \
-          | criterion-table > BENCHMARKS.md
+          cat supernova-ci-${{ env.BASE_COMMIT }}.json | criterion-table > BENCHMARKS.md
       - name: Write bench on commit comment
         uses: peter-evans/commit-comment@v3
         with:
           body-path: BENCHMARKS.md
-      # Check for a slowdown >= `$ARECIBO_NOISE_THRESHOLD` (fallback is 5%). If so, open an issue but don't block merge
+      # Check for a slowdown >= `$ARECIBO_BENCH_NOISE_THRESHOLD` (fallback is 30%/1.3x). If so, open an issue but don't block merge
+      # Since we are parsing for slowdowns, we simply add 1 to the noise threshold decimal to get the regression factor
       - name: Check for perf regression
         id: regression-check
         run: |
-          REGRESSIONS=$(awk -F'[*x]' '/slower/{print $12}' BENCHMARKS.md)
-          echo $regressions
+          REGRESSIONS=$(grep -o '[0-9.]*x slower' BENCHMARKS.md | cut -d 'x' -f1)
+          echo $REGRESSIONS
 
-          if [ ! -z "${{ env.ARECIBO_NOISE_THRESHOLD}}" ]; then
-            NOISE_THRESHOLD=$(echo "1+${{ env.ARECIBO_NOISE_THRESHOLD }}" | bc)
+          if [ ! -z "${{ env.ARECIBO_BENCH_NOISE_THRESHOLD}}" ]; then
+            REGRESSION_FACTOR=$(echo "${{ env.ARECIBO_BENCH_NOISE_THRESHOLD }}+1" | bc)
           else
-            NOISE_THRESHOLD=1.05
+            REGRESSION_FACTOR=1.3
           fi
 
           for r in $REGRESSIONS
           do
-            if (( $(echo "$r >= $NOISE_THRESHOLD" | bc -l) ))
+            if (( $(echo "$r >= $REGRESSION_FACTOR" | bc -l) ))
             then
               exit 1
             fi
           done
-          echo "NOISE_THRESHOLD=$NOISE_THRESHOLD" | tee -a $GITHUB_ENV
+
+          echo "NOISE_THRESHOLD=$("(REGRESSION_FACTOR-1)*100" | bc) | tee -a $GITHUB_ENV
         continue-on-error: true
       # Not possible to use ${{ github.event.number }} with the `merge_group` trigger
       - name: Get PR number from merge branch
@@ -126,6 +125,6 @@ jobs:
           PR_NUMBER: ${{ env.PR_NUMBER }}
           GIT_SHA: ${{ github.sha }}
           WORKFLOW_URL: ${{ env.WORKFLOW_URL }}
-          NOISE_THRESHOLD: $${{ env.NOISE_THRESHOLD }}
+          NOISE_THRESHOLD: ${{ env.NOISE_THRESHOLD }}
         with:
           filename: .github/PERF_REGRESSION.md
\ No newline at end of file
diff --git a/Cargo.toml b/Cargo.toml
index 06db8a542..ccd423551 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -98,6 +98,10 @@ harness = false
 name = "compressed-snark-supernova"
 harness = false
 
+[[bench]]
+name = "supernova-ci"
+harness = false
+
 [[bench]]
 name = "pcs"
 harness = false
diff --git a/benches/common/mod.rs b/benches/common/mod.rs
index 43ec5ac13..50c566c37 100644
--- a/benches/common/mod.rs
+++ b/benches/common/mod.rs
@@ -1,3 +1,5 @@
+pub mod supernova;
+
 use anyhow::anyhow;
 use criterion::BenchmarkId;
 
@@ -10,7 +12,7 @@ pub(crate) struct BenchParams {
 }
 impl BenchParams {
   pub(crate) fn bench_id(&self, name: &str) -> BenchmarkId {
-    let output_type = bench_output_env().unwrap_or("stdout".into());
+    let output_type = output_type_env().unwrap_or("stdout".into());
     match output_type.as_ref() {
       "pr-comment" => BenchmarkId::new(name, format!("StepCircuitSize-{}", self.step_size)),
       "commit-comment" => BenchmarkId::new(
@@ -29,13 +31,14 @@ impl BenchParams {
   }
 }
 
-fn bench_output_env() -> anyhow::Result<String> {
-  std::env::var("ARECIBO_BENCH_OUTPUT").map_err(|e| anyhow!("Bench output env var isn't set: {e}"))
+fn output_type_env() -> anyhow::Result<String> {
+  std::env::var("ARECIBO_BENCH_OUTPUT")
+    .map_err(|e| anyhow!("ARECIBO_BENCH_OUTPUT env var isn't set: {e}"))
 }
 
 pub(crate) fn noise_threshold_env() -> anyhow::Result<f64> {
   std::env::var("ARECIBO_BENCH_NOISE_THRESHOLD")
-    .map_err(|e| anyhow!("Noise threshold env var isn't set: {e}"))
+    .map_err(|e| anyhow!("ARECIBO_BENCH_NOISE_THRESHOLD env var isn't set: {e}"))
     .and_then(|nt| {
       nt.parse::<f64>()
         .map_err(|e| anyhow!("Failed to parse noise threshold: {e}"))
diff --git a/benches/common/supernova/bench.rs b/benches/common/supernova/bench.rs
new file mode 100644
index 000000000..4ccf243d1
--- /dev/null
+++ b/benches/common/supernova/bench.rs
@@ -0,0 +1,181 @@
+// Code is considered dead unless used in all benchmark targets
+#![allow(dead_code)]
+
+use crate::common::supernova::{
+  num_cons, NonUniformBench, SnarkType, E1, E2, NUM_CONS_VERIFIER_CIRCUIT_PRIMARY, NUM_SAMPLES,
+};
+use crate::common::{noise_threshold_env, BenchParams};
+use arecibo::{
+  provider::{PallasEngine, VestaEngine},
+  supernova::NonUniformCircuit,
+  supernova::TrivialTestCircuit,
+  supernova::{snark::CompressedSNARK, PublicParams, RecursiveSNARK},
+  traits::{
+    snark::RelaxedR1CSSNARKTrait,
+    snark::{default_ck_hint, BatchedRelaxedR1CSSNARKTrait},
+    Engine,
+  },
+};
+use criterion::{measurement::WallTime, *};
+
+/// Benchmarks the SNARK at a provided number of constraints
+///
+/// Parameters
+/// - `num_augmented_circuits`: the number of augmented circuits in this configuration
+/// - `group`: the criterion benchmark group
+/// - `num_cons`: the number of constraints in the step circuit
+pub fn bench_snark_internal_with_arity<
+  S1: BatchedRelaxedR1CSSNARKTrait<E1>,
+  S2: RelaxedR1CSSNARKTrait<E2>,
+>(
+  group: &mut BenchmarkGroup<'_, WallTime>,
+  num_augmented_circuits: usize,
+  num_cons: usize,
+  snark_type: SnarkType,
+) {
+  let bench: NonUniformBench<E1, E2, TrivialTestCircuit<<E2 as Engine>::Scalar>> = match snark_type
+  {
+    SnarkType::Recursive => NonUniformBench::new(2, num_cons),
+    SnarkType::Compressed => NonUniformBench::new(num_augmented_circuits, num_cons),
+  };
+  let pp = match snark_type {
+    SnarkType::Recursive => PublicParams::setup(&bench, &*default_ck_hint(), &*default_ck_hint()),
+    SnarkType::Compressed => PublicParams::setup(&bench, &*S1::ck_floor(), &*S2::ck_floor()),
+  };
+
+  // TODO: Can we use the same number of warmup steps for recursive and compressed?
+  let num_warmup_steps = match snark_type {
+    SnarkType::Recursive => 10,
+    SnarkType::Compressed => 3,
+  };
+  let z0_primary = vec![<E1 as Engine>::Scalar::from(2u64)];
+  let z0_secondary = vec![<E2 as Engine>::Scalar::from(2u64)];
+  let mut recursive_snark_option: Option<RecursiveSNARK<E1, E2>> = None;
+  let mut selected_augmented_circuit = 0;
+
+  for _ in 0..num_warmup_steps {
+    let mut recursive_snark = recursive_snark_option.unwrap_or_else(|| {
+      RecursiveSNARK::new(
+        &pp,
+        &bench,
+        &bench.primary_circuit(0),
+        &bench.secondary_circuit(),
+        &z0_primary,
+        &z0_secondary,
+      )
+      .unwrap()
+    });
+
+    if selected_augmented_circuit == 0 || selected_augmented_circuit == 1 {
+      recursive_snark
+        .prove_step(
+          &pp,
+          &bench.primary_circuit(selected_augmented_circuit),
+          &bench.secondary_circuit(),
+        )
+        .expect("Prove step failed");
+
+      recursive_snark
+        .verify(&pp, &z0_primary, &z0_secondary)
+        .expect("Verify failed");
+    } else {
+      unimplemented!()
+    }
+
+    selected_augmented_circuit = (selected_augmented_circuit + 1) % num_augmented_circuits;
+    recursive_snark_option = Some(recursive_snark)
+  }
+
+  assert!(recursive_snark_option.is_some());
+  let recursive_snark = recursive_snark_option.unwrap();
+
+  let bench_params = BenchParams {
+    step_size: num_cons,
+    date: env!("VERGEN_GIT_COMMIT_DATE"),
+    sha: env!("VERGEN_GIT_SHA"),
+  };
+
+  match snark_type {
+    SnarkType::Compressed => {
+      let (prover_key, verifier_key) = CompressedSNARK::<_, _, _, _, S1, S2>::setup(&pp).unwrap();
+      // Benchmark the prove time
+      group.bench_function(bench_params.bench_id("Prove"), |b| {
+        b.iter(|| {
+          assert!(CompressedSNARK::<_, _, _, _, S1, S2>::prove(
+            black_box(&pp),
+            black_box(&prover_key),
+            black_box(&recursive_snark)
+          )
+          .is_ok());
+        })
+      });
+
+      let res = CompressedSNARK::<_, _, _, _, S1, S2>::prove(&pp, &prover_key, &recursive_snark);
+      assert!(res.is_ok());
+      let compressed_snark = res.unwrap();
+      // Benchmark the verification time
+      group.bench_function(bench_params.bench_id("Verify"), |b| {
+        b.iter(|| {
+          assert!(black_box(&compressed_snark)
+            .verify(
+              black_box(&pp),
+              black_box(&verifier_key),
+              black_box(&z0_primary),
+              black_box(&z0_secondary),
+            )
+            .is_ok());
+        })
+      });
+    }
+    SnarkType::Recursive => {
+      // Benchmark the prove time
+      group.bench_function(bench_params.bench_id("Prove"), |b| {
+        b.iter(|| {
+          assert!(black_box(&mut recursive_snark.clone())
+            .prove_step(
+              black_box(&pp),
+              &bench.primary_circuit(0),
+              &bench.secondary_circuit()
+            )
+            .is_ok());
+        })
+      });
+
+      // Benchmark the verification time
+      group.bench_function(bench_params.bench_id("Verify"), |b| {
+        b.iter(|| {
+          assert!(black_box(&mut recursive_snark.clone())
+            .verify(
+              black_box(&pp),
+              black_box(&[<PallasEngine as Engine>::Scalar::from(2u64)]),
+              black_box(&[<VestaEngine as Engine>::Scalar::from(2u64)]),
+            )
+            .is_ok());
+        })
+      });
+    }
+  }
+}
+
+pub fn run_bench<S1: BatchedRelaxedR1CSSNARKTrait<E1>, S2: RelaxedR1CSSNARKTrait<E2>>(
+  c: &mut Criterion,
+  group_name: &str,
+  arity: usize,
+  snark_type: SnarkType,
+) {
+  // we vary the number of constraints in the step circuit
+  for &num_cons_in_augmented_circuit in num_cons().iter() {
+    // number of constraints in the step circuit
+    let num_cons = num_cons_in_augmented_circuit
+      .checked_sub(NUM_CONS_VERIFIER_CIRCUIT_PRIMARY)
+      .expect("Negative `num_cons`, constraint numbers out of date!");
+
+    let mut group = c.benchmark_group(group_name);
+    group.sample_size(NUM_SAMPLES);
+    group.noise_threshold(noise_threshold_env().unwrap_or(0.3));
+
+    bench_snark_internal_with_arity::<S1, S2>(&mut group, arity, num_cons, snark_type);
+
+    group.finish();
+  }
+}
diff --git a/benches/common/supernova/mod.rs b/benches/common/supernova/mod.rs
new file mode 100644
index 000000000..64d1b84d0
--- /dev/null
+++ b/benches/common/supernova/mod.rs
@@ -0,0 +1,168 @@
+// Code is considered dead unless used in all benchmark targets
+#![allow(dead_code)]
+
+pub mod bench;
+pub mod targets;
+
+use anyhow::anyhow;
+use arecibo::{
+  supernova::NonUniformCircuit,
+  supernova::{StepCircuit, TrivialTestCircuit},
+  traits::Engine,
+};
+use bellpepper_core::{num::AllocatedNum, ConstraintSystem, SynthesisError};
+use core::marker::PhantomData;
+use ff::PrimeField;
+
+pub type E1 = arecibo::provider::PallasEngine;
+pub type E2 = arecibo::provider::VestaEngine;
+pub type EE1 = arecibo::provider::ipa_pc::EvaluationEngine<E1>;
+pub type EE2 = arecibo::provider::ipa_pc::EvaluationEngine<E2>;
+// SNARKs without computation commitments
+pub type S1 = arecibo::spartan::batched::BatchedRelaxedR1CSSNARK<E1, EE1>;
+pub type S2 = arecibo::spartan::snark::RelaxedR1CSSNARK<E2, EE2>;
+// SNARKs with computation commitments
+pub type SS1 = arecibo::spartan::batched_ppsnark::BatchedRelaxedR1CSSNARK<E1, EE1>;
+pub type SS2 = arecibo::spartan::ppsnark::RelaxedR1CSSNARK<E2, EE2>;
+
+// This should match the value in test_supernova_recursive_circuit_pasta
+// Note `NUM_CONS_VERIFIER_CIRCUIT_PRIMARY` is different for Nova and Supernova
+// TODO: This should also be a table matching the num_augmented_circuits in the below
+pub const NUM_CONS_VERIFIER_CIRCUIT_PRIMARY: usize = 9844;
+pub const NUM_SAMPLES: usize = 10;
+
+#[derive(Copy, Clone)]
+pub enum SnarkType {
+  Recursive,
+  Compressed,
+}
+
+// TODO: Move this up a level to `common/mod.rs`, then integrate with non-Supernova benches
+pub fn num_cons() -> Vec<usize> {
+  num_cons_env().unwrap_or_else(|_| {
+    vec![
+      NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
+      16384,
+      32768,
+      65536,
+      131072,
+      262144,
+      524288,
+      1048576,
+    ]
+  })
+}
+
+fn num_cons_env() -> anyhow::Result<Vec<usize>> {
+  std::env::var("ARECIBO_BENCH_NUM_CONS")
+    .map_err(|e| anyhow!("ARECIBO_BENCH_NUM_CONS env var not set: {e}"))
+    .and_then(|rc| {
+      let vec: anyhow::Result<Vec<usize>> = rc
+        .split(',')
+        .map(|rc| {
+          rc.parse::<usize>()
+            .map_err(|e| anyhow!("Failed to parse constraint number: {e}"))
+        })
+        .collect();
+      vec
+    })
+}
+
+pub struct NonUniformBench<E1, E2, S>
+where
+  E1: Engine<Base = <E2 as Engine>::Scalar>,
+  E2: Engine<Base = <E1 as Engine>::Scalar>,
+  S: StepCircuit<E2::Scalar> + Default,
+{
+  num_circuits: usize,
+  num_cons: usize,
+  _p: PhantomData<(E1, E2, S)>,
+}
+
+impl<E1, E2, S> NonUniformBench<E1, E2, S>
+where
+  E1: Engine<Base = <E2 as Engine>::Scalar>,
+  E2: Engine<Base = <E1 as Engine>::Scalar>,
+  S: StepCircuit<E2::Scalar> + Default,
+{
+  fn new(num_circuits: usize, num_cons: usize) -> Self {
+    Self {
+      num_circuits,
+      num_cons,
+      _p: Default::default(),
+    }
+  }
+}
+
+impl<E1, E2, S>
+  NonUniformCircuit<E1, E2, NonTrivialTestCircuit<E1::Scalar>, TrivialTestCircuit<E2::Scalar>>
+  for NonUniformBench<E1, E2, S>
+where
+  E1: Engine<Base = <E2 as Engine>::Scalar>,
+  E2: Engine<Base = <E1 as Engine>::Scalar>,
+  S: StepCircuit<E2::Scalar> + Default,
+{
+  fn num_circuits(&self) -> usize {
+    self.num_circuits
+  }
+
+  fn primary_circuit(&self, circuit_index: usize) -> NonTrivialTestCircuit<E1::Scalar> {
+    assert!(
+      circuit_index < self.num_circuits,
+      "Circuit index out of bounds: asked for {circuit_index}, but there are only {} circuits.",
+      self.num_circuits
+    );
+
+    NonTrivialTestCircuit::new(self.num_cons)
+  }
+
+  fn secondary_circuit(&self) -> TrivialTestCircuit<E2::Scalar> {
+    Default::default()
+  }
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct NonTrivialTestCircuit<F: PrimeField> {
+  num_cons: usize,
+  _p: PhantomData<F>,
+}
+
+impl<F> NonTrivialTestCircuit<F>
+where
+  F: PrimeField,
+{
+  pub fn new(num_cons: usize) -> Self {
+    Self {
+      num_cons,
+      _p: Default::default(),
+    }
+  }
+}
+impl<F> StepCircuit<F> for NonTrivialTestCircuit<F>
+where
+  F: PrimeField,
+{
+  fn arity(&self) -> usize {
+    1
+  }
+
+  fn circuit_index(&self) -> usize {
+    0
+  }
+
+  fn synthesize<CS: ConstraintSystem<F>>(
+    &self,
+    cs: &mut CS,
+    pc: Option<&AllocatedNum<F>>,
+    z: &[AllocatedNum<F>],
+  ) -> Result<(Option<AllocatedNum<F>>, Vec<AllocatedNum<F>>), SynthesisError> {
+    // Consider a an equation: `x^{2 * num_cons} = y`, where `x` and `y` are respectively the input and output.
+    let mut x = z[0].clone();
+    let mut y = x.clone();
+    for i in 0..self.num_cons {
+      y = x.square(cs.namespace(|| format!("x_sq_{i}")))?;
+      x = y.clone();
+    }
+    Ok((pc.cloned(), vec![y]))
+  }
+}
diff --git a/benches/common/supernova/targets.rs b/benches/common/supernova/targets.rs
new file mode 100644
index 000000000..43a408812
--- /dev/null
+++ b/benches/common/supernova/targets.rs
@@ -0,0 +1,54 @@
+// Code is considered dead unless used in all benchmark targets
+#![allow(dead_code)]
+use criterion::Criterion;
+
+use crate::common::supernova::{bench::run_bench, SnarkType, S1, S2, SS1, SS2};
+
+// Recursive Supernova SNARK benchmarks
+pub fn bench_one_augmented_circuit_recursive_snark(c: &mut Criterion) {
+  run_bench::<S1, S2>(
+    c,
+    "RecursiveSNARKSuperNova-1circuit",
+    1,
+    SnarkType::Recursive,
+  )
+}
+
+pub fn bench_two_augmented_circuit_recursive_snark(c: &mut Criterion) {
+  run_bench::<S1, S2>(
+    c,
+    "RecursiveSNARKSuperNova-2circuit",
+    2,
+    SnarkType::Recursive,
+  )
+}
+
+// Compressed Supernova SNARK benchmarks
+pub fn bench_one_augmented_circuit_compressed_snark(c: &mut Criterion) {
+  run_bench::<S1, S2>(
+    c,
+    "CompressedSNARKSuperNova-1circuit",
+    1,
+    SnarkType::Compressed,
+  )
+}
+
+pub fn bench_two_augmented_circuit_compressed_snark(c: &mut Criterion) {
+  run_bench::<S1, S2>(
+    c,
+    "CompressedSNARKSuperNova-2circuit",
+    2,
+    SnarkType::Compressed,
+  )
+}
+
+pub fn bench_two_augmented_circuit_compressed_snark_with_computational_commitments(
+  c: &mut Criterion,
+) {
+  run_bench::<SS1, SS2>(
+    c,
+    "CompressedSNARKSuperNova-Commitments-2circuit",
+    2,
+    SnarkType::Compressed,
+  )
+}
diff --git a/benches/compressed-snark-supernova.rs b/benches/compressed-snark-supernova.rs
index 44a4607b6..f9b776894 100644
--- a/benches/compressed-snark-supernova.rs
+++ b/benches/compressed-snark-supernova.rs
@@ -1,29 +1,11 @@
-#![allow(non_snake_case)]
-use arecibo::{
-  supernova::NonUniformCircuit,
-  supernova::{snark::CompressedSNARK, PublicParams, RecursiveSNARK},
-  supernova::{StepCircuit, TrivialTestCircuit},
-  traits::{snark::BatchedRelaxedR1CSSNARKTrait, snark::RelaxedR1CSSNARKTrait, Engine},
-};
-use bellpepper_core::{num::AllocatedNum, ConstraintSystem, SynthesisError};
-use core::marker::PhantomData;
-use criterion::{measurement::WallTime, *};
-use ff::PrimeField;
+use criterion::*;
 use std::time::Duration;
 
 mod common;
-use common::{noise_threshold_env, BenchParams};
-
-type E1 = arecibo::provider::PallasEngine;
-type E2 = arecibo::provider::VestaEngine;
-type EE1 = arecibo::provider::ipa_pc::EvaluationEngine<E1>;
-type EE2 = arecibo::provider::ipa_pc::EvaluationEngine<E2>;
-// SNARKs without computation commitments
-type S1 = arecibo::spartan::batched::BatchedRelaxedR1CSSNARK<E1, EE1>;
-type S2 = arecibo::spartan::snark::RelaxedR1CSSNARK<E2, EE2>;
-// SNARKs with computation commitments
-type SS1 = arecibo::spartan::batched_ppsnark::BatchedRelaxedR1CSSNARK<E1, EE1>;
-type SS2 = arecibo::spartan::ppsnark::RelaxedR1CSSNARK<E2, EE2>;
+use common::supernova::targets::{
+  bench_one_augmented_circuit_compressed_snark, bench_two_augmented_circuit_compressed_snark,
+  bench_two_augmented_circuit_compressed_snark_with_computational_commitments,
+};
 
 // To run these benchmarks, first download `criterion` with `cargo install cargo-criterion`.
 // Then `cargo criterion --bench compressed-snark-supernova`. The results are located in `target/criterion/data/<name-of-benchmark>`.
@@ -46,285 +28,3 @@ cfg_if::cfg_if! {
 }
 
 criterion_main!(compressed_snark_supernova);
-
-// This should match the value in test_supernova_recursive_circuit_pasta
-// TODO: This should also be a table matching the num_augmented_circuits in the below
-const NUM_CONS_VERIFIER_CIRCUIT_PRIMARY: usize = 9844;
-const NUM_SAMPLES: usize = 10;
-
-struct NonUniformBench<E1, E2, S>
-where
-  E1: Engine<Base = <E2 as Engine>::Scalar>,
-  E2: Engine<Base = <E1 as Engine>::Scalar>,
-  S: StepCircuit<E2::Scalar> + Default,
-{
-  num_circuits: usize,
-  num_cons: usize,
-  _p: PhantomData<(E1, E2, S)>,
-}
-
-impl<E1, E2, S> NonUniformBench<E1, E2, S>
-where
-  E1: Engine<Base = <E2 as Engine>::Scalar>,
-  E2: Engine<Base = <E1 as Engine>::Scalar>,
-  S: StepCircuit<E2::Scalar> + Default,
-{
-  fn new(num_circuits: usize, num_cons: usize) -> Self {
-    Self {
-      num_circuits,
-      num_cons,
-      _p: Default::default(),
-    }
-  }
-}
-
-impl<E1, E2, S>
-  NonUniformCircuit<E1, E2, NonTrivialTestCircuit<E1::Scalar>, TrivialTestCircuit<E2::Scalar>>
-  for NonUniformBench<E1, E2, S>
-where
-  E1: Engine<Base = <E2 as Engine>::Scalar>,
-  E2: Engine<Base = <E1 as Engine>::Scalar>,
-  S: StepCircuit<E2::Scalar> + Default,
-{
-  fn num_circuits(&self) -> usize {
-    self.num_circuits
-  }
-
-  fn primary_circuit(&self, circuit_index: usize) -> NonTrivialTestCircuit<E1::Scalar> {
-    assert!(
-      circuit_index < self.num_circuits,
-      "Circuit index out of bounds: asked for {circuit_index}, but there are only {} circuits.",
-      self.num_circuits
-    );
-
-    NonTrivialTestCircuit::new(self.num_cons)
-  }
-
-  fn secondary_circuit(&self) -> TrivialTestCircuit<E2::Scalar> {
-    Default::default()
-  }
-}
-
-/// Benchmarks the compressed SNARK at a provided number of constraints
-///
-/// Parameters
-/// - `num_augmented_circuits`: the number of augmented circuits in this configuration
-/// - `group`: the criterion benchmark group
-/// - `num_cons`: the number of constraints in the step circuit
-fn bench_compressed_snark_internal_with_arity<
-  S1: BatchedRelaxedR1CSSNARKTrait<E1>,
-  S2: RelaxedR1CSSNARKTrait<E2>,
->(
-  group: &mut BenchmarkGroup<'_, WallTime>,
-  num_augmented_circuits: usize,
-  num_cons: usize,
-) {
-  let bench: NonUniformBench<E1, E2, TrivialTestCircuit<<E2 as Engine>::Scalar>> =
-    NonUniformBench::new(num_augmented_circuits, num_cons);
-  let pp = PublicParams::setup(&bench, &*S1::ck_floor(), &*S2::ck_floor());
-
-  let num_steps = 3;
-  let z0_primary = vec![<E1 as Engine>::Scalar::from(2u64)];
-  let z0_secondary = vec![<E2 as Engine>::Scalar::from(2u64)];
-  let mut recursive_snark_option: Option<RecursiveSNARK<E1, E2>> = None;
-  let mut selected_augmented_circuit = 0;
-
-  for _ in 0..num_steps {
-    let mut recursive_snark = recursive_snark_option.unwrap_or_else(|| {
-      RecursiveSNARK::new(
-        &pp,
-        &bench,
-        &bench.primary_circuit(0),
-        &bench.secondary_circuit(),
-        &z0_primary,
-        &z0_secondary,
-      )
-      .unwrap()
-    });
-
-    if selected_augmented_circuit == 0 || selected_augmented_circuit == 1 {
-      let res = recursive_snark.prove_step(
-        &pp,
-        &bench.primary_circuit(selected_augmented_circuit),
-        &bench.secondary_circuit(),
-      );
-      res.expect("Prove step failed");
-
-      let res = recursive_snark.verify(&pp, &z0_primary, &z0_secondary);
-      res.expect("Verify failed");
-    } else {
-      unimplemented!()
-    }
-
-    selected_augmented_circuit = (selected_augmented_circuit + 1) % num_augmented_circuits;
-    recursive_snark_option = Some(recursive_snark)
-  }
-
-  assert!(recursive_snark_option.is_some());
-  let recursive_snark = recursive_snark_option.unwrap();
-
-  let (prover_key, verifier_key) = CompressedSNARK::<_, _, _, _, S1, S2>::setup(&pp).unwrap();
-
-  let bench_params = BenchParams {
-    step_size: num_cons,
-    date: env!("VERGEN_GIT_COMMIT_DATE"),
-    sha: env!("VERGEN_GIT_SHA"),
-  };
-
-  // Benchmark the prove time
-  group.bench_function(bench_params.bench_id("Prove"), |b| {
-    b.iter(|| {
-      assert!(CompressedSNARK::<_, _, _, _, S1, S2>::prove(
-        black_box(&pp),
-        black_box(&prover_key),
-        black_box(&recursive_snark)
-      )
-      .is_ok());
-    })
-  });
-
-  let res = CompressedSNARK::<_, _, _, _, S1, S2>::prove(&pp, &prover_key, &recursive_snark);
-
-  assert!(res.is_ok());
-  let compressed_snark = res.unwrap();
-
-  // Benchmark the verification time
-  group.bench_function(bench_params.bench_id("Verify"), |b| {
-    b.iter(|| {
-      assert!(black_box(&compressed_snark)
-        .verify(
-          black_box(&pp),
-          black_box(&verifier_key),
-          black_box(&z0_primary),
-          black_box(&z0_secondary),
-        )
-        .is_ok());
-    })
-  });
-}
-
-fn bench_one_augmented_circuit_compressed_snark(c: &mut Criterion) {
-  // we vary the number of constraints in the step circuit
-  for &num_cons_in_augmented_circuit in [
-    NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
-    16384,
-    32768,
-    65536,
-    131072,
-    262144,
-    524288,
-    1048576,
-  ]
-  .iter()
-  {
-    // number of constraints in the step circuit
-    let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
-
-    let mut group = c.benchmark_group("CompressedSNARKSuperNova-1circuit");
-    group.sample_size(NUM_SAMPLES);
-    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
-
-    bench_compressed_snark_internal_with_arity::<S1, S2>(&mut group, 1, num_cons);
-
-    group.finish();
-  }
-}
-
-fn bench_two_augmented_circuit_compressed_snark(c: &mut Criterion) {
-  // we vary the number of constraints in the step circuit
-  for &num_cons_in_augmented_circuit in [
-    NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
-    16384,
-    32768,
-    65536,
-    131072,
-    262144,
-    524288,
-    1048576,
-  ]
-  .iter()
-  {
-    // number of constraints in the step circuit
-    let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
-
-    let mut group = c.benchmark_group("CompressedSNARKSuperNova-2circuit");
-    group.sample_size(NUM_SAMPLES);
-    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
-
-    bench_compressed_snark_internal_with_arity::<S1, S2>(&mut group, 2, num_cons);
-
-    group.finish();
-  }
-}
-
-fn bench_two_augmented_circuit_compressed_snark_with_computational_commitments(c: &mut Criterion) {
-  // we vary the number of constraints in the step circuit
-  for &num_cons_in_augmented_circuit in [
-    NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
-    16384,
-    32768,
-    65536,
-    131072,
-    262144,
-    524288,
-    1048576,
-  ]
-  .iter()
-  {
-    // number of constraints in the step circuit
-    let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
-
-    let mut group = c.benchmark_group("CompressedSNARKSuperNova-Commitments-2circuit");
-    group.sample_size(NUM_SAMPLES);
-    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
-
-    bench_compressed_snark_internal_with_arity::<SS1, SS2>(&mut group, 2, num_cons);
-
-    group.finish();
-  }
-}
-#[derive(Clone, Debug, Default)]
-struct NonTrivialTestCircuit<F: PrimeField> {
-  num_cons: usize,
-  _p: PhantomData<F>,
-}
-
-impl<F> NonTrivialTestCircuit<F>
-where
-  F: PrimeField,
-{
-  pub fn new(num_cons: usize) -> Self {
-    Self {
-      num_cons,
-      _p: Default::default(),
-    }
-  }
-}
-impl<F> StepCircuit<F> for NonTrivialTestCircuit<F>
-where
-  F: PrimeField,
-{
-  fn arity(&self) -> usize {
-    1
-  }
-
-  fn circuit_index(&self) -> usize {
-    0
-  }
-
-  fn synthesize<CS: ConstraintSystem<F>>(
-    &self,
-    cs: &mut CS,
-    pc: Option<&AllocatedNum<F>>,
-    z: &[AllocatedNum<F>],
-  ) -> Result<(Option<AllocatedNum<F>>, Vec<AllocatedNum<F>>), SynthesisError> {
-    // Consider a an equation: `x^{2 * num_cons} = y`, where `x` and `y` are respectively the input and output.
-    let mut x = z[0].clone();
-    let mut y = x.clone();
-    for i in 0..self.num_cons {
-      y = x.square(cs.namespace(|| format!("x_sq_{i}")))?;
-      x = y.clone();
-    }
-    Ok((pc.cloned(), vec![y]))
-  }
-}
diff --git a/benches/justfile b/benches/justfile
index 4e74be610..4741c8571 100644
--- a/benches/justfile
+++ b/benches/justfile
@@ -13,24 +13,30 @@ bench +benches:
     cargo criterion --bench $bench
   done
 
-gpu-env: 
+# Run CUDA benchmarks on GPU
+gpu-bench +benches:
+  #!/bin/sh
   # The `compute`/`sm` number corresponds to the Nvidia GPU architecture
   # In this case, the self-hosted machine uses the Ampere architecture, but we want this to be configurable
   # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
-  export CUDA_ARCH := `nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g'`
-  export EC_GPU_CUDA_NVCC_ARGS := "--fatbin --gpu-architecture=sm_$CUDA_ARCH --generate-code=arch=compute_$CUDA_ARCH,code=sm_$CUDA_ARCH"
-  export EC_GPU_FRAMEWORK := "cuda"
+  export CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | sed 's/\.//g')
+  export EC_GPU_CUDA_NVCC_ARGS="--fatbin --gpu-architecture=sm_$CUDA_ARCH --generate-code=arch=compute_$CUDA_ARCH,code=sm_$CUDA_ARCH"
+  export EC_GPU_FRAMEWORK="cuda"
 
-# Run CUDA benchmarks on GPU
-gpu-bench +benches: gpu-env
-  #!/bin/sh
   for bench in {{benches}}; do
     cargo criterion --bench $bench --features "cuda"
   done
 
-# Run CUDA benchmarks on GPU, tuned for CI
+# Run CUDA benchmarks on GPU, tuned for CI on Linux x86_64
 gpu-bench-ci +benches:
+  printenv PATH
   #!/bin/sh
+  if [ $(uname -m) = "x86_64" ]; then
+    FEATURES="cuda,asm"
+  else
+    FEATURES="cuda"
+  fi
+
   for bench in {{benches}}; do
-    cargo criterion --bench $bench --features "cuda" --message-format=json > "$bench-{{commit}}".json
+    cargo criterion --bench $bench --features $FEATURES --message-format=json > "$bench-{{commit}}".json
   done
\ No newline at end of file
diff --git a/benches/recursive-snark-supernova.rs b/benches/recursive-snark-supernova.rs
index 23b0d4c2b..2fa77c9df 100644
--- a/benches/recursive-snark-supernova.rs
+++ b/benches/recursive-snark-supernova.rs
@@ -1,19 +1,10 @@
-#![allow(non_snake_case)]
-use arecibo::{
-  provider::{PallasEngine, VestaEngine},
-  supernova::NonUniformCircuit,
-  supernova::{PublicParams, RecursiveSNARK},
-  supernova::{StepCircuit, TrivialTestCircuit},
-  traits::{snark::default_ck_hint, Engine},
-};
-use bellpepper_core::{num::AllocatedNum, ConstraintSystem, SynthesisError};
-use core::marker::PhantomData;
-use criterion::{measurement::WallTime, *};
-use ff::PrimeField;
+use criterion::*;
 use std::time::Duration;
 
 mod common;
-use common::{noise_threshold_env, BenchParams};
+use common::supernova::targets::{
+  bench_one_augmented_circuit_recursive_snark, bench_two_augmented_circuit_recursive_snark,
+};
 
 // To run these benchmarks, first download `criterion` with `cargo install cargo-criterion`.
 // Then `cargo criterion --bench recursive-snark-supernova`. The results are located in `target/criterion/data/<name-of-benchmark>`.
@@ -36,253 +27,3 @@ cfg_if::cfg_if! {
 }
 
 criterion_main!(recursive_snark_supernova);
-
-// This should match the value in test_supernova_recursive_circuit_pasta
-// TODO: This should also be a table matching the num_augmented_circuits in the below
-const NUM_CONS_VERIFIER_CIRCUIT_PRIMARY: usize = 9844;
-const NUM_SAMPLES: usize = 10;
-
-struct NonUniformBench<E1, E2, S>
-where
-  E1: Engine<Base = <E2 as Engine>::Scalar>,
-  E2: Engine<Base = <E1 as Engine>::Scalar>,
-  S: StepCircuit<E2::Scalar> + Default,
-{
-  num_circuits: usize,
-  num_cons: usize,
-  _p: PhantomData<(E1, E2, S)>,
-}
-
-impl<E1, E2, S> NonUniformBench<E1, E2, S>
-where
-  E1: Engine<Base = <E2 as Engine>::Scalar>,
-  E2: Engine<Base = <E1 as Engine>::Scalar>,
-  S: StepCircuit<E2::Scalar> + Default,
-{
-  fn new(num_circuits: usize, num_cons: usize) -> Self {
-    Self {
-      num_circuits,
-      num_cons,
-      _p: Default::default(),
-    }
-  }
-}
-
-impl<E1, E2, S>
-  NonUniformCircuit<E1, E2, NonTrivialTestCircuit<E1::Scalar>, TrivialTestCircuit<E2::Scalar>>
-  for NonUniformBench<E1, E2, S>
-where
-  E1: Engine<Base = <E2 as Engine>::Scalar>,
-  E2: Engine<Base = <E1 as Engine>::Scalar>,
-  S: StepCircuit<E2::Scalar> + Default,
-{
-  fn num_circuits(&self) -> usize {
-    self.num_circuits
-  }
-
-  fn primary_circuit(&self, circuit_index: usize) -> NonTrivialTestCircuit<E1::Scalar> {
-    assert!(circuit_index < self.num_circuits);
-
-    NonTrivialTestCircuit::new(self.num_cons)
-  }
-
-  fn secondary_circuit(&self) -> TrivialTestCircuit<E2::Scalar> {
-    Default::default()
-  }
-}
-
-/// Benchmarks the compressed SNARK at a provided number of constraints
-///
-/// Parameters
-/// - `num_augmented_circuits`: the number of augmented circuits in this configuration
-/// - `group`: the criterion benchmark group
-/// - `num_cons`: the number of constraints in the step circuit
-fn bench_recursive_snark_internal_with_arity(
-  group: &mut BenchmarkGroup<'_, WallTime>,
-  num_augmented_circuits: usize,
-  num_cons: usize,
-) {
-  let bench: NonUniformBench<
-    PallasEngine,
-    VestaEngine,
-    TrivialTestCircuit<<VestaEngine as Engine>::Scalar>,
-  > = NonUniformBench::new(2, num_cons);
-  let pp = PublicParams::setup(&bench, &*default_ck_hint(), &*default_ck_hint());
-
-  // Bench time to produce a recursive SNARK;
-  // we execute a certain number of warm-up steps since executing
-  // the first step is cheaper than other steps owing to the presence of
-  // a lot of zeros in the satisfying assignment
-  let num_warmup_steps = 10;
-  let z0_primary = vec![<PallasEngine as Engine>::Scalar::from(2u64)];
-  let z0_secondary = vec![<VestaEngine as Engine>::Scalar::from(2u64)];
-  let mut recursive_snark_option: Option<RecursiveSNARK<PallasEngine, VestaEngine>> = None;
-  let mut selected_augmented_circuit = 0;
-
-  for _ in 0..num_warmup_steps {
-    let mut recursive_snark = recursive_snark_option.unwrap_or_else(|| {
-      RecursiveSNARK::new(
-        &pp,
-        &bench,
-        &bench.primary_circuit(0),
-        &bench.secondary_circuit(),
-        &z0_primary,
-        &z0_secondary,
-      )
-      .unwrap()
-    });
-
-    if selected_augmented_circuit == 0 || selected_augmented_circuit == 1 {
-      recursive_snark
-        .prove_step(
-          &pp,
-          &bench.primary_circuit(selected_augmented_circuit),
-          &bench.secondary_circuit(),
-        )
-        .expect("Prove step failed");
-
-      recursive_snark
-        .verify(&pp, &z0_primary, &z0_secondary)
-        .expect("Verify failed");
-    } else {
-      unimplemented!()
-    }
-
-    selected_augmented_circuit = (selected_augmented_circuit + 1) % num_augmented_circuits;
-    recursive_snark_option = Some(recursive_snark)
-  }
-
-  assert!(recursive_snark_option.is_some());
-  let recursive_snark = recursive_snark_option.unwrap();
-
-  let bench_params = BenchParams {
-    step_size: num_cons,
-    date: env!("VERGEN_GIT_COMMIT_DATE"),
-    sha: env!("VERGEN_GIT_SHA"),
-  };
-
-  // Benchmark the prove time
-  group.bench_function(bench_params.bench_id("Prove"), |b| {
-    b.iter(|| {
-      // produce a recursive SNARK for a step of the recursion
-      assert!(black_box(&mut recursive_snark.clone())
-        .prove_step(
-          black_box(&pp),
-          &bench.primary_circuit(0),
-          &bench.secondary_circuit(),
-        )
-        .is_ok());
-    })
-  });
-
-  // Benchmark the verification time
-  group.bench_function(bench_params.bench_id("Verify"), |b| {
-    b.iter(|| {
-      assert!(black_box(&mut recursive_snark.clone())
-        .verify(
-          black_box(&pp),
-          black_box(&[<PallasEngine as Engine>::Scalar::from(2u64)]),
-          black_box(&[<VestaEngine as Engine>::Scalar::from(2u64)]),
-        )
-        .is_ok());
-    });
-  });
-}
-
-fn bench_one_augmented_circuit_recursive_snark(c: &mut Criterion) {
-  // we vary the number of constraints in the step circuit
-  for &num_cons_in_augmented_circuit in [
-    NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
-    16384,
-    32768,
-    65536,
-    131072,
-    262144,
-    524288,
-    1048576,
-  ]
-  .iter()
-  {
-    // number of constraints in the step circuit
-    let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
-
-    let mut group = c.benchmark_group("RecursiveSNARKSuperNova-1circuit");
-    group.sample_size(NUM_SAMPLES);
-    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
-
-    bench_recursive_snark_internal_with_arity(&mut group, 1, num_cons);
-    group.finish();
-  }
-}
-
-fn bench_two_augmented_circuit_recursive_snark(c: &mut Criterion) {
-  // we vary the number of constraints in the step circuit
-  for &num_cons_in_augmented_circuit in [
-    NUM_CONS_VERIFIER_CIRCUIT_PRIMARY,
-    16384,
-    32768,
-    65536,
-    131072,
-    262144,
-    524288,
-    1048576,
-  ]
-  .iter()
-  {
-    // number of constraints in the step circuit
-    let num_cons = num_cons_in_augmented_circuit - NUM_CONS_VERIFIER_CIRCUIT_PRIMARY;
-
-    let mut group = c.benchmark_group("RecursiveSNARKSuperNova-2circuit");
-    group.sample_size(NUM_SAMPLES);
-    group.noise_threshold(noise_threshold_env().unwrap_or(0.05));
-
-    bench_recursive_snark_internal_with_arity(&mut group, 2, num_cons);
-    group.finish();
-  }
-}
-
-#[derive(Clone, Debug, Default)]
-struct NonTrivialTestCircuit<F: PrimeField> {
-  num_cons: usize,
-  _p: PhantomData<F>,
-}
-
-impl<F> NonTrivialTestCircuit<F>
-where
-  F: PrimeField,
-{
-  pub fn new(num_cons: usize) -> Self {
-    Self {
-      num_cons,
-      _p: Default::default(),
-    }
-  }
-}
-impl<F> StepCircuit<F> for NonTrivialTestCircuit<F>
-where
-  F: PrimeField,
-{
-  fn arity(&self) -> usize {
-    1
-  }
-
-  fn circuit_index(&self) -> usize {
-    0
-  }
-
-  fn synthesize<CS: ConstraintSystem<F>>(
-    &self,
-    cs: &mut CS,
-    pc: Option<&AllocatedNum<F>>,
-    z: &[AllocatedNum<F>],
-  ) -> Result<(Option<AllocatedNum<F>>, Vec<AllocatedNum<F>>), SynthesisError> {
-    // Consider a an equation: `x^2 = y`, where `x` and `y` are respectively the input and output.
-    let mut x = z[0].clone();
-    let mut y = x.clone();
-    for i in 0..self.num_cons {
-      y = x.square(cs.namespace(|| format!("x_sq_{i}")))?;
-      x = y.clone();
-    }
-    Ok((pc.cloned(), vec![y]))
-  }
-}
diff --git a/benches/supernova-ci.rs b/benches/supernova-ci.rs
new file mode 100644
index 000000000..d9f9a9b36
--- /dev/null
+++ b/benches/supernova-ci.rs
@@ -0,0 +1,31 @@
+use criterion::*;
+
+use std::time::Duration;
+
+mod common;
+use common::supernova::targets::{
+  bench_two_augmented_circuit_compressed_snark_with_computational_commitments,
+  bench_two_augmented_circuit_recursive_snark,
+};
+
+// To run these benchmarks, first download `criterion` with `cargo install cargo-criterion`.
+// Then `cargo criterion --bench recursive-snark-supernova`. The results are located in `target/criterion/data/<name-of-benchmark>`.
+// For flamegraphs, run `cargo criterion --bench recursive-snark-supernova --features flamegraph -- --profile-time <secs>`.
+// The results are located in `target/criterion/profile/<name-of-benchmark>`.
+cfg_if::cfg_if! {
+  if #[cfg(feature = "flamegraph")] {
+    criterion_group! {
+      name = supernova_ci;
+      config = Criterion::default().warm_up_time(Duration::from_millis(3000)).with_profiler(pprof::criterion::PProfProfiler::new(100, pprof::criterion::Output::Flamegraph(None)));
+      targets = bench_two_augmented_circuit_recursive_snark, bench_two_augmented_circuit_compressed_snark_with_computational_commitments
+    }
+  } else {
+    criterion_group! {
+      name = supernova_ci;
+      config = Criterion::default().warm_up_time(Duration::from_millis(3000));
+      targets = bench_two_augmented_circuit_recursive_snark, bench_two_augmented_circuit_compressed_snark_with_computational_commitments
+    }
+  }
+}
+
+criterion_main!(supernova_ci);