Skip to content

Commit

Permalink
Rework special model testing to avoid shared cache interference. (#18344
Browse files Browse the repository at this point in the history
)

This commit reworks the special model testing for sdxl and sd3 so that
different instances generating compiler artifacts don't write to the
same shared cache. This is a major problem because the things we are
running for each test are no longer consistent. On top of that,
benchmarking was picking up from this same cache, so there were a bunch
of failures leading to chaos. Now, the output artifacts are written to a
place local to each instance running, so there should be no attempts to
write to the same cache in between jobs.

We still could use a better longterm solution to avoid this problem for
our input artifacts (mlir, weights), but for now we are good as they
rarely change and we can be strategic with when we update them.
Huggingface seems like a good option to move toward

Fixes #18336

---------

Signed-off-by: saienduri <saimanas.enduri@amd.com>
  • Loading branch information
saienduri authored Aug 24, 2024
1 parent eb694f1 commit cc44a85
Show file tree
Hide file tree
Showing 9 changed files with 129 additions and 65 deletions.
1 change: 1 addition & 0 deletions .github/workflows/pkgci_regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ jobs:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
VENV_DIR: ${{ github.workspace }}/venv
TEST_OUTPUT_ARTIFACTS: ${{ github.workspace }}/model_output_artifacts
steps:
- name: Checking out IREE repository
uses: actions/checkout@v4.1.7
Expand Down
28 changes: 16 additions & 12 deletions experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,16 @@
import tabulate
from pytest_check import check

vmfb_dir = os.getenv("TEST_OUTPUT_ARTIFACTS", default=Path.cwd())
benchmark_dir = os.path.dirname(os.path.realpath(__file__))
artifacts_dir = os.getenv("IREE_TEST_FILES", default=Path.cwd()) + "/artifacts"
artifacts_dir = Path(os.path.expanduser(artifacts_dir)).resolve()
prompt_encoder_dir = f"{artifacts_dir}/sdxl_clip"
scheduled_unet_dir = f"{artifacts_dir}/sdxl_unet"
vae_decode_dir = f"{artifacts_dir}/sdxl_vae"
prompt_encoder_dir_compile = f"{vmfb_dir}/sdxl_clip_vmfbs"
scheduled_unet_dir_compile = f"{vmfb_dir}/sdxl_unet_vmfbs"
vae_decode_dir_compile = f"{vmfb_dir}/sdxl_vae_vmfbs"


def run_iree_command(args: Sequence[str] = ()):
Expand Down Expand Up @@ -69,11 +73,11 @@ def run_sdxl_rocm_benchmark(rocm_chip):
"iree-benchmark-module",
f"--device=hip",
"--device_allocator=caching",
f"--module={prompt_encoder_dir}/model.rocm_{rocm_chip}.vmfb",
f"--module={prompt_encoder_dir_compile}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={prompt_encoder_dir}/real_weights.irpa",
f"--module={scheduled_unet_dir}/model.rocm_{rocm_chip}.vmfb",
f"--module={scheduled_unet_dir_compile}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={scheduled_unet_dir}/real_weights.irpa",
f"--module={vae_decode_dir}/model.rocm_{rocm_chip}.vmfb",
f"--module={vae_decode_dir_compile}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={vae_decode_dir}/real_weights.irpa",
f"--module={benchmark_dir}/sdxl_full_pipeline_fp16_rocm.vmfb",
"--function=tokens_to_image",
Expand All @@ -95,7 +99,7 @@ def run_sdxl_unet_rocm_benchmark(rocm_chip):
"iree-benchmark-module",
f"--device=hip",
"--device_allocator=caching",
f"--module={scheduled_unet_dir}/model.rocm_{rocm_chip}.vmfb",
f"--module={scheduled_unet_dir_compile}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={scheduled_unet_dir}/real_weights.irpa",
"--function=run_forward",
"--input=1x4x128x128xf16",
Expand All @@ -116,7 +120,7 @@ def run_sdxl_prompt_encoder_rocm_benchmark(rocm_chip):
"iree-benchmark-module",
f"--device=hip",
"--device_allocator=caching",
f"--module={prompt_encoder_dir}/model.rocm_{rocm_chip}.vmfb",
f"--module={prompt_encoder_dir_compile}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={prompt_encoder_dir}/real_weights.irpa",
"--function=encode_prompts",
"--input=1x64xi64",
Expand All @@ -135,7 +139,7 @@ def run_sdxl_vae_decode_rocm_benchmark(rocm_chip):
"iree-benchmark-module",
f"--device=hip",
"--device_allocator=caching",
f"--module={vae_decode_dir}/model.rocm_{rocm_chip}.vmfb",
f"--module={vae_decode_dir_compile}/model.rocm_{rocm_chip}.vmfb",
f"--parameters=model={vae_decode_dir}/real_weights.irpa",
"--function=main",
"--input=1x4x128x128xf16",
Expand Down Expand Up @@ -221,7 +225,7 @@ def test_sdxl_rocm_benchmark(
logging.getLogger().info(mean_line)

# unet compilation stats check
with open(f"{scheduled_unet_dir}/compilation_info.json", "r") as file:
with open(f"{scheduled_unet_dir_compile}/compilation_info.json", "r") as file:
comp_stats = json.load(file)
unet_dispatch_count = int(
comp_stats["stream-aggregate"]["execution"]["dispatch-count"]
Expand All @@ -232,7 +236,7 @@ def test_sdxl_rocm_benchmark(
)
logging.getLogger().info(compilation_line)

module_path = f"{scheduled_unet_dir}/model.rocm_{rocm_chip}.vmfb"
module_path = f"{scheduled_unet_dir_compile}/model.rocm_{rocm_chip}.vmfb"
unet_binary_size = Path(module_path).stat().st_size
compilation_line = (
f"Scheduled Unet Binary Size: {unet_binary_size} bytes"
Expand All @@ -250,7 +254,7 @@ def test_sdxl_rocm_benchmark(
logging.getLogger().info(mean_line)

# prompt encoder compilation stats check
with open(f"{prompt_encoder_dir}/compilation_info.json", "r") as file:
with open(f"{prompt_encoder_dir_compile}/compilation_info.json", "r") as file:
comp_stats = json.load(file)
clip_dispatch_count = int(
comp_stats["stream-aggregate"]["execution"]["dispatch-count"]
Expand All @@ -261,7 +265,7 @@ def test_sdxl_rocm_benchmark(
)
logging.getLogger().info(compilation_line)

module_path = f"{prompt_encoder_dir}/model.rocm_{rocm_chip}.vmfb"
module_path = f"{prompt_encoder_dir_compile}/model.rocm_{rocm_chip}.vmfb"
clip_binary_size = Path(module_path).stat().st_size
compilation_line = (
f"Prompt Encoder Binary Size: {clip_binary_size} bytes"
Expand All @@ -279,7 +283,7 @@ def test_sdxl_rocm_benchmark(
logging.getLogger().info(mean_line)

# vae decode compilation stats check
with open(f"{vae_decode_dir}/compilation_info.json", "r") as file:
with open(f"{vae_decode_dir_compile}/compilation_info.json", "r") as file:
comp_stats = json.load(file)
vae_dispatch_count = int(
comp_stats["stream-aggregate"]["execution"]["dispatch-count"]
Expand All @@ -290,7 +294,7 @@ def test_sdxl_rocm_benchmark(
)
logging.getLogger().info(compilation_line)

module_path = f"{vae_decode_dir}/model.rocm_{rocm_chip}.vmfb"
module_path = f"{vae_decode_dir_compile}/model.rocm_{rocm_chip}.vmfb"
vae_binary_size = Path(module_path).stat().st_size
compilation_line = (
f"VAE Decode Binary Size: {vae_binary_size} bytes"
Expand Down
67 changes: 30 additions & 37 deletions experimental/regression_suite/ireers_tools/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pathlib import Path
import subprocess
import time
import os

from .artifacts import (
Artifact,
Expand All @@ -35,60 +36,52 @@ def fetcher() -> FetchedArtifact:
return fetcher


def iree_compile(source: Artifact, compiled_variant: str, flags: Sequence[str]):
name = Path(source.name).with_suffix(f".{compiled_variant}.vmfb")

def callback(vmfb_artifact: ProducedArtifact):
sep = "\n "
print("**************************************************************")
print(f"Compiling {source} -> {vmfb_artifact} with flags:")
print(f" {sep.join(flags)}")
exec_args = (
[
"iree-compile",
"-o",
str(vmfb_artifact.path),
str(source.path),
]
+ IREE_COMPILE_QOL_FLAGS
+ flags
)
start_time = time.time()
subprocess.run(
exec_args, check=True, capture_output=True, cwd=source.group.directory
)
run_time = time.time() - start_time
print(f"Compilation succeeded in {run_time}s")
print("**************************************************************")

return ProducedArtifact(source.group, name, callback, depends=[source]).start()
def iree_compile(source: Artifact, flags: Sequence[str], vmfb_path: Path):
if not os.path.exists(vmfb_path.parent):
os.makedirs(vmfb_path.parent)
sep = "\n "
print("**************************************************************")
print(f" {sep.join(flags)}")
exec_args = (
[
"iree-compile",
"-o",
str(vmfb_path),
str(source.path),
]
+ IREE_COMPILE_QOL_FLAGS
+ flags
)
print("Exec:", " ".join(exec_args))
start_time = time.time()
subprocess.run(exec_args, check=True, capture_output=True, cwd=vmfb_path.parent)
run_time = time.time() - start_time
print(f"Compilation succeeded in {run_time}s")
print("**************************************************************")
return vmfb_path


def iree_run_module(vmfb: Artifact, *, device, function, args: Sequence[str] = ()):
vmfb.join()
def iree_run_module(vmfb: Path, *, device, function, args: Sequence[str] = ()):
exec_args = [
"iree-run-module",
f"--device={device}",
f"--module={vmfb.path}",
f"--module={vmfb}",
f"--function={function}",
]
exec_args.extend(args)
print("**************************************************************")
print("Exec:", " ".join(exec_args))
subprocess.run(exec_args, check=True, capture_output=True, cwd=vmfb.group.directory)
subprocess.run(exec_args, check=True, capture_output=True, cwd=vmfb.parent)


def iree_benchmark_module(
vmfb: Artifact, *, device, function, args: Sequence[str] = ()
):
vmfb.join()
def iree_benchmark_module(vmfb: Path, *, device, function, args: Sequence[str] = ()):
exec_args = [
"iree-benchmark-module",
f"--device={device}",
f"--module={vmfb.path}",
f"--module={vmfb}",
f"--function={function}",
]
exec_args.extend(args)
print("**************************************************************")
print("Exec:", " ".join(exec_args))
subprocess.check_call(exec_args, cwd=vmfb.group.directory)
subprocess.check_call(exec_args, cwd=vmfb.parent)
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from ireers_tools import *
import os
from conftest import VmfbManager
from pathlib import Path

rocm_chip = os.getenv("ROCM_CHIP", default="gfx90a")
vmfb_dir = os.getenv("TEST_OUTPUT_ARTIFACTS", default=Path.cwd())

###############################################################################
# Fixtures
Expand Down Expand Up @@ -123,7 +125,11 @@ def SD3_CLIP_COMMON_RUN_FLAGS(

def test_compile_clip_cpu(sd3_clip_mlir):
VmfbManager.sd3_clip_cpu_vmfb = iree_compile(
sd3_clip_mlir, "cpu", CPU_COMPILE_FLAGS
sd3_clip_mlir,
CPU_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sd3_clip_vmfbs")
/ Path(sd3_clip_mlir.path.name).with_suffix(f".cpu.vmfb"),
)


Expand Down Expand Up @@ -152,7 +158,11 @@ def test_run_clip_cpu(SD3_CLIP_COMMON_RUN_FLAGS, sd3_clip_real_weights):
)
def test_compile_clip_rocm(sd3_clip_mlir):
VmfbManager.sd3_clip_rocm_vmfb = iree_compile(
sd3_clip_mlir, f"rocm_{rocm_chip}", ROCM_COMPILE_FLAGS
sd3_clip_mlir,
ROCM_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sd3_clip_vmfbs")
/ Path(sd3_clip_mlir.path.name).with_suffix(f".rocm_{rocm_chip}.vmfb"),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

iree_test_path_extension = os.getenv("IREE_TEST_PATH_EXTENSION", default=Path.cwd())
rocm_chip = os.getenv("ROCM_CHIP", default="gfx90a")
vmfb_dir = os.getenv("TEST_OUTPUT_ARTIFACTS", default=Path.cwd())

###############################################################################
# Fixtures
Expand Down Expand Up @@ -107,7 +108,11 @@ def SD3_MMDIT_COMMON_RUN_FLAGS(

def test_compile_mmdit_cpu(sd3_mmdit_mlir):
VmfbManager.sd3_mmdit_cpu_vmfb = iree_compile(
sd3_mmdit_mlir, "cpu", CPU_COMPILE_FLAGS
sd3_mmdit_mlir,
CPU_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sd3_mmdit_vmfbs")
/ Path(sd3_mmdit_mlir.path.name).with_suffix(f".cpu.vmfb"),
)


Expand Down Expand Up @@ -137,7 +142,11 @@ def test_run_mmdit_cpu(SD3_MMDIT_COMMON_RUN_FLAGS, sd3_mmdit_real_weights):
)
def test_compile_mmdit_rocm(sd3_mmdit_mlir):
VmfbManager.sd3_mmdit_rocm_vmfb = iree_compile(
sd3_mmdit_mlir, f"rocm_{rocm_chip}", ROCM_COMPILE_FLAGS
sd3_mmdit_mlir,
ROCM_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sd3_mmdit_vmfbs")
/ Path(sd3_mmdit_mlir.path.name).with_suffix(f".rocm_{rocm_chip}.vmfb"),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from ireers_tools import *
import os
from conftest import VmfbManager
from pathlib import Path

rocm_chip = os.getenv("ROCM_CHIP", default="gfx90a")
vmfb_dir = os.getenv("TEST_OUTPUT_ARTIFACTS", default=Path.cwd())

###############################################################################
# Fixtures
Expand Down Expand Up @@ -77,7 +79,13 @@ def SD3_VAE_COMMON_RUN_FLAGS(


def test_compile_vae_cpu(sd3_vae_mlir):
VmfbManager.sd3_vae_cpu_vmfb = iree_compile(sd3_vae_mlir, "cpu", CPU_COMPILE_FLAGS)
VmfbManager.sd3_vae_cpu_vmfb = iree_compile(
sd3_vae_mlir,
CPU_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sd3_vae_vmfbs")
/ Path(sd3_vae_mlir.path.name).with_suffix(f".cpu.vmfb"),
)


@pytest.mark.depends(on=["test_compile_vae_cpu"])
Expand All @@ -101,7 +109,11 @@ def test_run_vae_cpu(SD3_VAE_COMMON_RUN_FLAGS, sd3_vae_real_weights):

def test_compile_vae_rocm(sd3_vae_mlir):
VmfbManager.sd3_vae_rocm_vmfb = iree_compile(
sd3_vae_mlir, f"rocm_{rocm_chip}", ROCM_COMPILE_FLAGS
sd3_vae_mlir,
ROCM_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sd3_vae_vmfbs")
/ Path(sd3_vae_mlir.path.name).with_suffix(f".rocm_{rocm_chip}.vmfb"),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from ireers_tools import *
import os
from conftest import VmfbManager
from pathlib import Path

rocm_chip = os.getenv("ROCM_CHIP", default="gfx90a")
vmfb_dir = os.getenv("TEST_OUTPUT_ARTIFACTS", default=Path.cwd())

###############################################################################
# Fixtures
Expand Down Expand Up @@ -111,7 +113,11 @@ def SDXL_CLIP_COMMON_RUN_FLAGS(

def test_compile_clip_cpu(sdxl_clip_mlir):
VmfbManager.sdxl_clip_cpu_vmfb = iree_compile(
sdxl_clip_mlir, "cpu", CPU_COMPILE_FLAGS
sdxl_clip_mlir,
CPU_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sdxl_clip_vmfbs")
/ Path(sdxl_clip_mlir.path.name).with_suffix(f".cpu.vmfb"),
)


Expand All @@ -136,7 +142,11 @@ def test_run_clip_cpu(SDXL_CLIP_COMMON_RUN_FLAGS, sdxl_clip_real_weights):

def test_compile_clip_rocm(sdxl_clip_mlir):
VmfbManager.sdxl_clip_rocm_vmfb = iree_compile(
sdxl_clip_mlir, f"rocm_{rocm_chip}", ROCM_COMPILE_FLAGS
sdxl_clip_mlir,
ROCM_COMPILE_FLAGS,
Path(vmfb_dir)
/ Path("sdxl_clip_vmfbs")
/ Path(sdxl_clip_mlir.path.name).with_suffix(f".rocm_{rocm_chip}.vmfb"),
)


Expand Down
Loading

0 comments on commit cc44a85

Please sign in to comment.