From 68400ea74c337a7ef4e9aa67d1afa004fbd4dd9b Mon Sep 17 00:00:00 2001
From: Jerry Wu <cheyuw@google.com>
Date: Wed, 30 Aug 2023 19:11:47 +0000
Subject: [PATCH] Add bench scripts

---
 e2e_bench/baseline/bench_baseline.sh   | 18 +++++++
 e2e_bench/dt_and_uk/bench_dt_and_uk.sh | 18 +++++++
 e2e_bench/fetch.sh                     | 39 ++++++++++++++
 e2e_bench/run.sh                       | 75 ++++++++++++++++++++++++++
 4 files changed, 150 insertions(+)
 create mode 100755 e2e_bench/baseline/bench_baseline.sh
 create mode 100755 e2e_bench/dt_and_uk/bench_dt_and_uk.sh
 create mode 100755 e2e_bench/fetch.sh
 create mode 100755 e2e_bench/run.sh
diff --git a/e2e_bench/baseline/bench_baseline.sh b/e2e_bench/baseline/bench_baseline.sh
new file mode 100755
index 0000000000000..b905ae8b305a7
--- /dev/null
+++ b/e2e_bench/baseline/bench_baseline.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# The script will find iree tools in PATH. To reproduce baseline benchmarks,
+# please build tools at 40794933d45fdbb05d631c9612dc91cc343d1efe.
+
+export MODEL_DIR=..
+
+export IREE_BENCHMARK_MODULE="iree-benchmark-module"
+export TRACE_MODE=0
+
+THREADS=1 ../run.sh | tee run1.log
+THREADS=4 ../run.sh | tee run4.log
+THREADS=8 ../run.sh | tee run8.log
+
+export IREE_BENCHMARK_MODULE="iree-traced-benchmark-module"
+export TRACE_MODE=1
+
+THREADS=1 ../run.sh | tee traced_run1.log
diff --git a/e2e_bench/dt_and_uk/bench_dt_and_uk.sh b/e2e_bench/dt_and_uk/bench_dt_and_uk.sh
new file mode 100755
index 0000000000000..8567769be8e56
--- /dev/null
+++ b/e2e_bench/dt_and_uk/bench_dt_and_uk.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# The script will find iree tools in PATH. To reproduce data-tiling benchmarks,
+# please build tools at 4cc440bc3599207828585f4b51b685a1585fe431
+
+export MODEL_DIR=..
+
+export IREE_BENCHMARK_MODULE="iree-benchmark-module"
+export TRACE_MODE=0
+
+THREADS=1 ../run.sh | tee run1.log
+THREADS=4 ../run.sh | tee run4.log
+THREADS=8 ../run.sh | tee run8.log
+
+export IREE_BENCHMARK_MODULE="iree-traced-benchmark-module"
+export TRACE_MODE=1
+
+THREADS=1 ../run.sh | tee traced_run1.log
diff --git a/e2e_bench/fetch.sh b/e2e_bench/fetch.sh
new file mode 100755
index 0000000000000..3774ffbdb7902
--- /dev/null
+++ b/e2e_bench/fetch.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+# wget -O EfficientNetV2SPT.mlirbc https://storage.googleapis.com/iree-model-artifacts/pytorch/torch_models_20230321.784_1679461251/EFFICIENTNET_V2_S/batch_1/linalg.mlir &
+# cat<<EOF > EfficientNetV2SPT.mlirbc.run_flag
+# --function=forward
+# --input=1x3x384x384xf32=0
+# EOF
+
+wget -O BertLargeTF_Batch1.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/manual/BertLargeTF_2023-05-07.timestamp_1683504734.mlirbc &
+cat<<EOF > BertLargeTF_Batch1.mlirbc.run_flag
+--function=serving_default
+--input=1x384xi32=0
+--input=1x384xi32=0
+--input=1x384xi32=0
+EOF
+
+wget -O BertLargeTF_Batch32.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230817_1692333975/BERT_LARGE_FP32_TF_384XI32_BATCH32/stablehlo.mlirbc &
+cat<<EOF > BertLargeTF_Batch32.mlirbc.run_flag
+--function=forward
+--input=32x384xi32=0
+--input=32x384xi32=0
+--input=32x384xi32=0
+EOF
+
+wget -O T5LargeTF_Batch1.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230817_1692333975/T5_LARGE_FP32_TF_512XI32_BATCH1/stablehlo.mlirbc &
+cat<<EOF > T5LargeTF_Batch1.mlirbc.run_flag
+--function=forward
+--input=1x512xi32=0
+--input=1x512xi32=0
+EOF
+
+wget -O T5LargeTF_Batch32.mlirbc https://storage.googleapis.com/iree-model-artifacts/tensorflow/tf_models_2.15.0.dev20230817_1692333975/T5_LARGE_FP32_TF_512XI32_BATCH32/stablehlo.mlirbc &
+cat<<EOF > T5LargeTF_Batch32.mlirbc.run_flag
+--function=forward
+--input=32x512xi32=0
+--input=32x512xi32=0
+EOF
+
+wait
diff --git a/e2e_bench/run.sh b/e2e_bench/run.sh
new file mode 100755
index 0000000000000..13a8074f6c549
--- /dev/null
+++ b/e2e_bench/run.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -xeuo pipefail
+
+IREE_OPT="$(which ${IREE_OPT:-iree-opt})"
+IREE_COMPILE="$(which ${IREE_COMPILER:-iree-compile})"
+IREE_BENCHMARK_MODULE="$(which ${IREE_BENCHMARK_MODULE:-iree-benchmark-module})"
+IREE_TRACY="$(which ${IREE_TRACY:-iree-tracy-capture})"
+TRACE_MODE="${TRACE_MODE:-0}"
+THREADS="${THREADS:-1}"
+PREFIX="${PREFIX:-}"
+MODEL_DIR="${MODEL_DIR:-.}"
+COMP_FLAGS="${COMP_FLAGS:-}"
+
+# for MODEL_PATH in $(ls "${MODEL_DIR}/"*.mlirbc); do
+for MODEL_PATH in $(ls "${MODEL_DIR}"/BertLargeTF_Batch32.mlirbc); do
+  MODEL_FILE="$(basename "${MODEL_PATH}")"
+  echo ">>>> ${MODEL_FILE} <<<<"
+
+  "${IREE_COMPILE}" \
+    "${MODEL_PATH}" \
+    -o "${PREFIX}${MODEL_FILE}.linalg.mlir" \
+    --iree-hal-target-backends=llvm-cpu \
+    --iree-input-type=auto \
+    --iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu \
+    --iree-llvmcpu-target-cpu=cascadelake \
+    --iree-flow-enable-data-tiling \
+    --iree-llvmcpu-enable-microkernels \
+    --compile-to="preprocessing"
+
+  "${IREE_OPT}" --mlir-print-debuginfo "${PREFIX}${MODEL_FILE}.linalg.mlir" > "${PREFIX}${MODEL_FILE}.debug.mlir"
+
+  "${IREE_COMPILE}" \
+    "${PREFIX}${MODEL_FILE}.debug.mlir" \
+    -o "${PREFIX}${MODEL_FILE}.vmfb" \
+    ${COMP_FLAGS} \
+    --iree-hal-target-backends=llvm-cpu \
+    --iree-input-type=auto \
+    --iree-llvmcpu-target-triple=x86_64-unknown-linux-gnu \
+    --iree-llvmcpu-target-cpu=cascadelake \
+    --iree-flow-enable-data-tiling \
+    --iree-llvmcpu-enable-microkernels \
+    --mlir-print-ir-after=iree-flow-outline-dispatch-regions \
+    --mlir-elide-elementsattrs-if-larger=4 2> "${PREFIX}${MODEL_FILE}.dump"
+
+  if (( THREADS == 1 )); then
+    declare -a THREAD_ARGS=(
+      "--device=local-sync"
+    )
+  else
+    declare -a THREAD_ARGS=(
+      "--device=local-task"
+      "--task_topology_max_group_count=${THREADS}"
+    )
+  fi
+
+  RUN_ARGS=($(cat "${MODEL_PATH}.run_flag"))
+
+  if (( TRACE_MODE == 1 )); then
+    "${IREE_TRACY}" -f -o "${PREFIX}${MODEL_FILE}".tracy >/dev/null &
+    REPETITIONS=1
+  else
+    REPETITIONS=5
+  fi
+
+  TRACY_NO_EXIT="${TRACE_MODE}" numactl --cpubind=0 --membind=0 -- \
+    "${IREE_BENCHMARK_MODULE}" \
+    --device_allocator=caching \
+    --benchmark_repetitions="${REPETITIONS}" \
+    --module=${PREFIX}${MODEL_FILE}.vmfb \
+    "${THREAD_ARGS[@]}" \
+    "${RUN_ARGS[@]}"
+
+  wait
+done