Add bench scripts

iree-org · Aug 30, 2023 · 857af86 · 857af86
1 parent 4cc440b
commit 857af86
Show file tree

Hide file tree

Showing 5 changed files with 174 additions and 0 deletions.
diff --git a/e2e_bench/README.md b/e2e_bench/README.md
@@ -0,0 +1,24 @@
+To run benchmarks:
+
+```sh
+cd e2e_bench
+
+# Fetch models
+./fetch.sh
+
+# Build baseline IREE tools (iree-compile, iree-opt, iree-benchmark-module) and
+make sure they can be found in PATH
+
+# Run baseline benchmarks
+cd baseline
+./bench_baseline.sh
+cd ..
+
+# Build IREE tools with data-tiling changes in this PR.
+
+# Run batch_matmul data-tiling benchmarks
+cd baseline
+cd dt_and_uk
+./bench_dt_and_uk.sh
+cd ..
+```
diff --git a/e2e_bench/baseline/bench_baseline.sh b/e2e_bench/baseline/bench_baseline.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# The script will find iree tools in PATH. To reproduce baseline benchmarks,
+# please build tools at 40794933d45fdbb05d631c9612dc91cc343d1efe.
+
+export MODEL_DIR=..
+
+export IREE_BENCHMARK_MODULE="iree-benchmark-module"
+export TRACE_MODE=0
+
+THREADS=1 ../run.sh | tee run1.log
+THREADS=4 ../run.sh | tee run4.log
+THREADS=8 ../run.sh | tee run8.log
+
+# export IREE_BENCHMARK_MODULE="iree-traced-benchmark-module"
+# export TRACE_MODE=1
+# 
+# THREADS=1 ../run.sh | tee traced_run1.log
diff --git a/e2e_bench/dt_and_uk/bench_dt_and_uk.sh b/e2e_bench/dt_and_uk/bench_dt_and_uk.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# The script will find iree tools in PATH. To reproduce data-tiling benchmarks,
+# please build tools at 4cc440bc3599207828585f4b51b685a1585fe431
+
+export MODEL_DIR=..
+
+export IREE_BENCHMARK_MODULE="iree-benchmark-module"
+export TRACE_MODE=0
+
+THREADS=1 ../run.sh | tee run1.log
+THREADS=4 ../run.sh | tee run4.log
+THREADS=8 ../run.sh | tee run8.log
+
+# export IREE_BENCHMARK_MODULE="iree-traced-benchmark-module"
+# export TRACE_MODE=1
+# 
+# THREADS=1 ../run.sh | tee traced_run1.log
diff --git a/e2e_bench/fetch.sh b/e2e_bench/fetch.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# wget -O EfficientNetV2SPT.mlirbc https://storage.googleapis.com/iree-model-artifacts/pytorch/torch_models_20230321.784_1679461251/EFFICIENTNET_V2_S/batch_1/linalg.mlir &
+# cat<<EOF > EfficientNetV2SPT.mlirbc.run_flag
+# --function=forward
+# --input=1x3x384x384xf32=0
+# EOF
+
+wget -O BertLargeTF_Batch1.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/manual/BertLargeTF_2023-05-07.timestamp_1683504734.mlirbc &
+cat<<EOF > BertLargeTF_Batch1.mlirbc.run_flag
+--function=serving_default
+--input=1x384xi32=0
+--input=1x384xi32=0
+--input=1x384xi32=0
+EOF
+
+wget -O BertLargeTF_Batch32.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230817_1692333975/BERT_LARGE_FP32_TF_384XI32_BATCH32/stablehlo.mlirbc &
+cat<<EOF > BertLargeTF_Batch32.mlirbc.run_flag
+--function=forward
+--input=32x384xi32=0
+--input=32x384xi32=0
+--input=32x384xi32=0
+EOF
+
+wget -O T5LargeTF_Batch1.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230817_1692333975/T5_LARGE_FP32_TF_512XI32_BATCH1/stablehlo.mlirbc &
+cat<<EOF > T5LargeTF_Batch1.mlirbc.run_flag
+--function=forward
+--input=1x512xi32=0
+--input=1x512xi32=0
+EOF
+
+wget -O T5LargeTF_Batch32.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230817_1692333975/T5_LARGE_FP32_TF_512XI32_BATCH32/stablehlo.mlirbc &
+cat<<EOF > T5LargeTF_Batch32.mlirbc.run_flag
+--function=forward
+--input=32x512xi32=0
+--input=32x512xi32=0
+EOF
+
+wait
diff --git a/e2e_bench/run.sh b/e2e_bench/run.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -xeuo pipefail
+
+IREE_OPT="$(which ${IREE_OPT:-iree-opt})"
+IREE_COMPILE="$(which ${IREE_COMPILER:-iree-compile})"
+IREE_BENCHMARK_MODULE="$(which ${IREE_BENCHMARK_MODULE:-iree-benchmark-module})"
+IREE_TRACY="$(which ${IREE_TRACY:-iree-tracy-capture})"
+TRACE_MODE="${TRACE_MODE:-0}"
+THREADS="${THREADS:-1}"
+PREFIX="${PREFIX:-}"
+MODEL_DIR="${MODEL_DIR:-.}"
+COMP_FLAGS="${COMP_FLAGS:-}"
+
+# for MODEL_PATH in $(ls "${MODEL_DIR}/"*.mlirbc); do
+for MODEL_PATH in $(ls "${MODEL_DIR}"/BertLargeTF_Batch32.mlirbc); do
+  MODEL_FILE="$(basename "${MODEL_PATH}")"
+  echo ">>>> ${MODEL_FILE} <<<<"
+
+  "${IREE_COMPILE}" \
+    "${MODEL_PATH}" \
+    -o "${PREFIX}${MODEL_FILE}.linalg.mlir" \
+    --iree-hal-target-backends=llvm-cpu \
+    --iree-input-type=auto \
+    --iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu \
+    --iree-llvmcpu-target-cpu=cascadelake \
+    --iree-flow-enable-data-tiling \
+    --iree-llvmcpu-enable-microkernels \
+    --compile-to="preprocessing"
+
+  "${IREE_OPT}" --mlir-print-debuginfo "${PREFIX}${MODEL_FILE}.linalg.mlir" > "${PREFIX}${MODEL_FILE}.debug.mlir"
+
+  "${IREE_COMPILE}" \
+    "${PREFIX}${MODEL_FILE}.debug.mlir" \
+    -o "${PREFIX}${MODEL_FILE}.vmfb" \
+    ${COMP_FLAGS} \
+    --iree-hal-target-backends=llvm-cpu \
+    --iree-input-type=auto \
+    --iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu \
+    --iree-llvmcpu-target-cpu=cascadelake \
+    --iree-flow-enable-data-tiling \
+    --iree-llvmcpu-enable-microkernels \
+    --mlir-print-ir-after=iree-flow-outline-dispatch-regions \
+    --mlir-elide-elementsattrs-if-larger=4 2> "${PREFIX}${MODEL_FILE}.dump"
+
+  if (( THREADS == 1 )); then
+    declare -a THREAD_ARGS=(
+      "--device=local-sync"
+    )
+  else
+    declare -a THREAD_ARGS=(
+      "--device=local-task"
+      "--task_topology_max_group_count=${THREADS}"
+    )
+  fi
+
+  RUN_ARGS=($(cat "${MODEL_PATH}.run_flag"))
+
+  if (( TRACE_MODE == 1 )); then
+    "${IREE_TRACY}" -f -o "${PREFIX}${MODEL_FILE}".tracy >/dev/null &
+    REPETITIONS=1
+  else
+    REPETITIONS=5
+  fi
+
+  TRACY_NO_EXIT="${TRACE_MODE}" numactl --cpubind=0 --membind=0 -- \
+    "${IREE_BENCHMARK_MODULE}" \
+    --device_allocator=caching \
+    --benchmark_repetitions="${REPETITIONS}" \
+    --module=${PREFIX}${MODEL_FILE}.vmfb \
+    "${THREAD_ARGS[@]}" \
+    "${RUN_ARGS[@]}"
+
+  wait
+done