diff --git a/.github/workflows/test_cli_cpu_ipex.yaml b/.github/workflows/test_cli_cpu_ipex.yaml index d6b94d3e2..05e23a2a2 100644 --- a/.github/workflows/test_cli_cpu_ipex.yaml +++ b/.github/workflows/test_cli_cpu_ipex.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cpu') || contains( github.event.pull_request.labels.*.name, 'ipex') || contains( github.event.pull_request.labels.*.name, 'cli_cpu_ipex') @@ -47,5 +48,10 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,ipex,diffusers,timm] - - name: Run tests - run: pytest tests/test_cli.py -s -k "cli and cpu and ipex" + - name: Run cli tests + run: | + pytest tests/test_cli.py -s -k "cli and cpu and ipex" + + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -s -k "cpu and ipex" diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml index 05d436833..09b1e40a3 100644 --- a/.github/workflows/test_cli_cpu_llama_cpp.yaml +++ b/.github/workflows/test_cli_cpu_llama_cpp.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cpu') || contains( github.event.pull_request.labels.*.name, 'llama_cpp') || contains( github.event.pull_request.labels.*.name, 'cli_cpu_llama_cpp') @@ -47,5 +48,6 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,llama-cpp] - - name: Run tests - run: pytest tests/test_cli.py -s -k "llama_cpp" + - name: Run cli tests + run: | + pytest tests/test_cli.py -s -k "llama_cpp" \ No newline at end of file diff --git a/.github/workflows/test_cli_cpu_neural_compressor.yaml b/.github/workflows/test_cli_cpu_neural_compressor.yaml index 435f4216b..3bfa705b3 100644 --- a/.github/workflows/test_cli_cpu_neural_compressor.yaml +++ b/.github/workflows/test_cli_cpu_neural_compressor.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cpu') || contains( github.event.pull_request.labels.*.name, 'neural_compressor') || contains( github.event.pull_request.labels.*.name, 'cli_cpu_neural_compressor') @@ -47,5 +48,10 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,neural-compressor,diffusers,timm] - - name: Run tests - run: pytest tests/test_cli.py -s -k "cli and cpu and neural_compressor" + - name: Run cli tests + run: | + pytest tests/test_cli.py -s -k "cli and cpu and neural_compressor" + + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -s -k "cpu and neural_compressor" \ No newline at end of file diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml index 21e652358..6e11968d4 100644 --- a/.github/workflows/test_cli_cpu_onnxruntime.yaml +++ b/.github/workflows/test_cli_cpu_onnxruntime.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cpu') || contains( github.event.pull_request.labels.*.name, 'onnxruntime') || contains( github.event.pull_request.labels.*.name, 'cli_cpu_onnxruntime') @@ -47,5 +48,10 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,onnxruntime,diffusers,timm] - - name: Run tests - run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime" + - name: Run cli tests + run: | + pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime" + + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -s -k "cpu and onnxruntime" \ No newline at end of file diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml index 4612370c2..fafeaa133 100644 --- a/.github/workflows/test_cli_cpu_openvino.yaml +++ b/.github/workflows/test_cli_cpu_openvino.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cpu') || contains( github.event.pull_request.labels.*.name, 'openvino') || contains( github.event.pull_request.labels.*.name, 'cli_cpu_openvino') @@ -47,5 +48,10 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,openvino,diffusers,timm] - - name: Run tests - run: pytest tests/test_cli.py -s -k "cli and cpu and openvino" + - name: Run cli tests + run: | + pytest tests/test_cli.py -s -k "cli and cpu and openvino" + + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -s -k "cpu and openvino" \ No newline at end of file diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml index d07f61700..5a1cbb773 100644 --- a/.github/workflows/test_cli_cpu_py_txi.yaml +++ b/.github/workflows/test_cli_cpu_py_txi.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cpu') || contains( github.event.pull_request.labels.*.name, 'py_txi') || contains( github.event.pull_request.labels.*.name, 'cli_cpu_py_txi') @@ -47,5 +48,10 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,py-txi] - - name: Run tests - run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi" + - name: Run cli tests + run: | + pytest tests/test_cli.py -s -k "cli and cpu and py_txi" + + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -s -k "examples and cpu and py_txi" \ No newline at end of file diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml index fef2a772d..bb7ae17f8 100644 --- a/.github/workflows/test_cli_cpu_pytorch.yaml +++ b/.github/workflows/test_cli_cpu_pytorch.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cpu') || contains( github.event.pull_request.labels.*.name, 'pytorch') || contains( github.event.pull_request.labels.*.name, 'cli_cpu_pytorch') @@ -47,5 +48,6 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,diffusers,timm,peft] - - name: Run tests - run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch" + - name: Run cli tests + run: | + pytest tests/test_cli.py -s -k "cli and cpu and pytorch" \ No newline at end of file diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml index 0584665cf..298f37a8e 100644 --- a/.github/workflows/test_cli_cuda_onnxruntime.yaml +++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cuda') || contains( github.event.pull_request.labels.*.name, 'onnxruntime') || contains( github.event.pull_request.labels.*.name, 'cli_cuda_onnxruntime') @@ -45,6 +46,6 @@ jobs: run: | pip install -e .[testing,onnxruntime-gpu,diffusers,timm] - - name: Run tests + - name: Run cli tests run: | pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime" diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml index 7339b98e3..88c7e1f43 100644 --- a/.github/workflows/test_cli_cuda_py_txi.yaml +++ b/.github/workflows/test_cli_cuda_py_txi.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cuda') || contains( github.event.pull_request.labels.*.name, 'py_txi') || contains( github.event.pull_request.labels.*.name, 'cli_cuda_py_txi') @@ -47,5 +48,10 @@ jobs: pip install --upgrade pip pip install -e .[testing,py-txi] - - name: Run tests - run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi" + - name: Run cli tests + run: | + pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi" + + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -x -s -k "cuda and py_txi" \ No newline at end of file diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml index 0bc5dfaf6..dd49280b0 100644 --- a/.github/workflows/test_cli_cuda_pytorch.yaml +++ b/.github/workflows/test_cli_cuda_pytorch.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cuda') || contains( github.event.pull_request.labels.*.name, 'pytorch') || contains( github.event.pull_request.labels.*.name, 'single_gpu') || @@ -43,13 +44,21 @@ jobs: uses: actions/checkout@v4 - name: Install dependencies + env: + LOG_LEVEL: ERROR run: | - pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq] + pip install -e .[testing,diffusers,timm,peft,bitsandbytes,autoawq,auto-gptq,codecarbon] - - name: Run tests + - name: Run cli tests run: | pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)" + - name: Run tests from example folder + env: + LOG_LEVEL: ERROR + run: | + pytest tests/test_examples.py -x -s -k "cuda and pytorch" + run_cli_cuda_pytorch_multi_gpu_tests: if: ${{ (github.event_name == 'push') || @@ -76,10 +85,16 @@ jobs: run: | pip install -e .[testing,diffusers,timm,peft,deepspeed] - - name: Run tests (parallel) + - name: Run cli tests (parallel) run: | pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map)" - - name: Run tests (sequential) + - name: Run cli tests (sequential) run: | FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (deepspeed_inference)" + + - name: Run tests from example folder + env: + LOG_LEVEL: ERROR + run: | + FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -x -s -k "cuda and pytorch" diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml index acb04fe26..9e2cbe05d 100644 --- a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml +++ b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cuda') || contains( github.event.pull_request.labels.*.name, 'tensorrt_llm') || contains( github.event.pull_request.labels.*.name, 'single_gpu') || @@ -46,10 +47,14 @@ jobs: run: | pip install -e .[testing] - - name: Run tests + - name: Run cli tests run: | pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)" + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -x -s -k "cuda and tensorrt_llm" + cli_cuda_tensorrt_llm_multi_gpu_tests: if: ${{ (github.event_name == 'push') || @@ -76,6 +81,7 @@ jobs: run: | pip install -e .[testing] - - name: Run tests (sequential) + - name: Run cli tests (sequential) run: | FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and (tp or pp)" + diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml index ee886e8c5..5f356d185 100644 --- a/.github/workflows/test_cli_cuda_torch_ort.yaml +++ b/.github/workflows/test_cli_cuda_torch_ort.yaml @@ -56,6 +56,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cuda') || contains( github.event.pull_request.labels.*.name, 'torch_ort') || contains( github.event.pull_request.labels.*.name, 'multi_gpu') || @@ -78,6 +79,6 @@ jobs: pip install -e .[testing,torch-ort,peft] pip install optimum@git+https://github.com/huggingface/optimum.git - - name: Run tests + - name: Run cli tests run: | - pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map)" + pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and (dp or ddp or device_map)" \ No newline at end of file diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml index 732513d27..53edd9a31 100644 --- a/.github/workflows/test_cli_cuda_vllm.yaml +++ b/.github/workflows/test_cli_cuda_vllm.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cuda') || contains( github.event.pull_request.labels.*.name, 'vllm') || contains( github.event.pull_request.labels.*.name, 'single_gpu') || @@ -49,12 +50,17 @@ jobs: - name: Run tests (sequential) run: | FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)" + + - name: Run tests from example folder + run: | + pytest tests/test_examples.py -x -s -k "cuda and vllm" run_cli_cuda_vllm_multi_gpu_tests: if: ${{ (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'cuda') || contains( github.event.pull_request.labels.*.name, 'vllm') || contains( github.event.pull_request.labels.*.name, 'multi_gpu') || @@ -76,6 +82,10 @@ jobs: run: | pip install -e .[testing] - - name: Run tests (sequential) + - name: Run cli tests (sequential) run: | FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and (tp or pp)" + + - name: Run tests from example folder + run: | + FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -x -s -k "cuda and vllm" \ No newline at end of file diff --git a/.github/workflows/test_cli_misc.yaml b/.github/workflows/test_cli_misc.yaml index fb699a883..7d0557d16 100644 --- a/.github/workflows/test_cli_misc.yaml +++ b/.github/workflows/test_cli_misc.yaml @@ -60,6 +60,6 @@ jobs: pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing] - - name: Run tests + - name: Run cli tests run: | - pytest tests/test_cli.py -s -k "cli and not (cpu or cuda or rocm or mps)" + pytest tests/test_cli.py -s -k "cli and not (cpu or cuda or rocm or mps)" \ No newline at end of file diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml index a212da5b4..607b1ff03 100644 --- a/.github/workflows/test_cli_rocm_pytorch.yaml +++ b/.github/workflows/test_cli_rocm_pytorch.yaml @@ -25,6 +25,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'rocm') || contains( github.event.pull_request.labels.*.name, 'pytorch') || contains( github.event.pull_request.labels.*.name, 'single_gpu') || @@ -49,7 +50,7 @@ jobs: run: | pip install -e .[testing,diffusers,timm,peft,autoawq,auto-gptq] - - name: Run tests + - name: Run cli tests run: | pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed) and not bnb" @@ -58,6 +59,7 @@ jobs: (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || contains( github.event.pull_request.labels.*.name, 'cli') || + contains( github.event.pull_request.labels.*.name, 'examples') || contains( github.event.pull_request.labels.*.name, 'rocm') || contains( github.event.pull_request.labels.*.name, 'pytorch') || contains( github.event.pull_request.labels.*.name, 'multi_gpu') || @@ -82,10 +84,10 @@ jobs: run: | pip install -e .[testing,diffusers,timm,deepspeed,peft,autoawq,auto-gptq] "deepspeed<0.15" - - name: Run tests (parallel) + - name: Run cli tests (parallel) run: | pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (dp or ddp or device_map)" - - name: Run tests (sequential) + - name: Run cli tests (sequential) run: | FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and (deepspeed_inference)" diff --git a/Makefile b/Makefile index 0e14a5db3..3a3b34fd4 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # List of targets that are not associated with files -.PHONY: quality style install build_cpu_image build_cuda_image build_cuda_ort_image build_rocm_image run_cpu_container run_cuda_container run_cuda_ort_container run_rocm_container install_api_misc install_api_cpu install_api_cuda install_api_rocm install_cli_misc install_cli_cpu_pytorch install_cli_cpu_openvino install_cli_cpu_onnxruntime install_cli_cpu_neural_compressor install_cli_cuda_pytorch install_cli_rocm_pytorch install_cli_cuda_torch_ort install_cli_cuda_onnxruntime test_api_misc test_api_cpu test_api_cuda test_api_rocm test_cli_misc test_cli_cpu_pytorch test_cli_cpu_openvino test_cli_cpu_onnxruntime test_cli_cpu_neural_compressor test_cli_cuda_onnxruntime test_cli_cuda_vllm test_cli_cuda_pytorch_multi_gpu test_cli_cuda_pytorch_single_gpu test_cli_cuda_torch_ort_multi_gpu test_cli_cuda_torch_ort_single_gpu test_cli_rocm_pytorch_multi_gpu test_cli_rocm_pytorch_single_gpu install_llm_perf_cuda_pytorch run_llm_perf_cuda_pytorch_unquantized run_llm_perf_cuda_pytorch_bnb run_llm_perf_cuda_pytorch_gptq run_llm_perf_cuda_pytorch_awq +.PHONY: quality style install build_cpu_image build_cuda_image build_cuda_ort_image build_rocm_image run_cpu_container run_cuda_container run_cuda_ort_container run_rocm_container run_trt_container install_api_misc install_api_cpu install_api_cuda install_api_rocm install_cli_misc install_cli_cpu_pytorch install_cli_cpu_openvino install_cli_cpu_onnxruntime install_cli_cpu_neural_compressor install_cli_cuda_pytorch install_cli_rocm_pytorch install_cli_cuda_torch_ort install_cli_cuda_onnxruntime test_api_misc test_api_cpu test_api_cuda test_api_rocm test_cli_misc test_cli_cpu_pytorch test_cli_cpu_openvino test_cli_cpu_onnxruntime test_cli_cpu_neural_compressor test_cli_cuda_onnxruntime test_cli_cuda_vllm test_cli_cuda_pytorch_multi_gpu test_cli_cuda_pytorch_single_gpu test_cli_cuda_torch_ort_multi_gpu test_cli_cuda_torch_ort_single_gpu test_cli_rocm_pytorch_multi_gpu test_cli_rocm_pytorch_single_gpu install_llm_perf_cuda_pytorch run_llm_perf_cuda_pytorch_unquantized run_llm_perf_cuda_pytorch_bnb run_llm_perf_cuda_pytorch_gptq run_llm_perf_cuda_pytorch_awq PWD := $(shell pwd) USER_ID := $(shell id -u) @@ -78,6 +78,29 @@ run_rocm_container: --workdir /optimum-benchmark \ optimum-benchmark:latest-rocm +run_trt_container: + docker run \ + -it \ + --rm \ + --pid host \ + --ipc host \ + --gpus all \ + --shm-size 64G \ + --volume $(PWD):/optimum-benchmark \ + --workdir /optimum-benchmark \ + huggingface/optimum-nvidia:latest + +run_cloud_cuda_container: + docker run \ + -it \ + --rm \ + --pid host \ + --gpus all \ + --shm-size 64G \ + --volume $(PWD):/optimum-benchmark \ + --workdir /optimum-benchmark \ + ghcr.io/huggingface/optimum-benchmark:latest-cuda + ## Install extras install_api_misc: diff --git a/examples/ipex_bert.yaml b/examples/ipex_bert.yaml index e549da0a4..8ff2edf4f 100644 --- a/examples/ipex_bert.yaml +++ b/examples/ipex_bert.yaml @@ -25,5 +25,4 @@ backend: device: cpu no_weights: false export: true - torch_dtype: bfloat16 model: bert-base-uncased diff --git a/examples/ipex_llama.yaml b/examples/ipex_llama.yaml index b564316b8..f01f49fdd 100644 --- a/examples/ipex_llama.yaml +++ b/examples/ipex_llama.yaml @@ -33,5 +33,4 @@ backend: device: cpu export: true no_weights: false - torch_dtype: bfloat16 model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 diff --git a/examples/pytorch_bert.py b/examples/pytorch_bert.py index 09f62b8d5..00e0a9c1d 100644 --- a/examples/pytorch_bert.py +++ b/examples/pytorch_bert.py @@ -1,10 +1,14 @@ import os +import warnings from huggingface_hub import whoami from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig from optimum_benchmark.logging_utils import setup_logging +if os.environ.get("LOG_LEVEL", "INFO") == "ERROR": + warnings.filterwarnings("ignore") # This disables all warnings + try: USERNAME = whoami()["name"] except Exception as e: diff --git a/examples/pytorch_bert_mps.yaml b/examples/pytorch_bert_mps.yaml index 4d4dc6e35..b17c8ae74 100644 --- a/examples/pytorch_bert_mps.yaml +++ b/examples/pytorch_bert_mps.yaml @@ -21,6 +21,4 @@ scenario: backend: device: cpu no_weights: true - model: bert-base-uncased - - + model: bert-base-uncased \ No newline at end of file diff --git a/examples/pytorch_llama.py b/examples/pytorch_gpt2.py similarity index 89% rename from examples/pytorch_llama.py rename to examples/pytorch_gpt2.py index 90c099317..8d66a04cd 100644 --- a/examples/pytorch_llama.py +++ b/examples/pytorch_gpt2.py @@ -1,17 +1,21 @@ import os +import warnings from huggingface_hub import whoami from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig from optimum_benchmark.logging_utils import setup_logging +if os.environ.get("LOG_LEVEL", "INFO") == "ERROR": + warnings.filterwarnings("ignore") # This disables all warnings + try: USERNAME = whoami()["name"] except Exception as e: print(f"Failed to get username from Hugging Face Hub: {e}") USERNAME = None -BENCHMARK_NAME = "pytorch-llama" +BENCHMARK_NAME = "pytorch-gpt2" WEIGHTS_CONFIGS = { "float16": { @@ -27,7 +31,7 @@ "4bit-gptq-exllama-v2": { "torch_dtype": "float16", "quantization_scheme": "gptq", - "quantization_config": {"bits": 4, "use_exllama ": True, "version": 2, "model_seqlen": 256}, + "quantization_config": {"bits": 4, "use_exllama": True, "version": 2, "model_seqlen": 256}, }, } diff --git a/examples/pytorch_llama.yaml b/examples/pytorch_gpt2.yaml similarity index 92% rename from examples/pytorch_llama.yaml rename to examples/pytorch_gpt2.yaml index becd1f2e4..cf69833e7 100644 --- a/examples/pytorch_llama.yaml +++ b/examples/pytorch_gpt2.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: pytorch_llama +name: pytorch_gpt2 launcher: device_isolation: true @@ -15,6 +15,7 @@ launcher: backend: model: gpt2 device: cuda + device_ids: 0 torch_dtype: float16 scenario: diff --git a/examples/tei_bge.yaml b/examples/tei_bge.yaml index dbbab7d51..f74349d3f 100644 --- a/examples/tei_bge.yaml +++ b/examples/tei_bge.yaml @@ -3,14 +3,11 @@ defaults: - scenario: inference - launcher: inline - backend: py-txi + - _base_ - _self_ name: tei_bert -launcher: - device_isolation: true - device_isolation_action: warn - backend: device: cpu model: BAAI/bge-base-en-v1.5 diff --git a/examples/tgi_llama.yaml b/examples/tgi_llama.yaml index 399667fbf..ffbe8556a 100644 --- a/examples/tgi_llama.yaml +++ b/examples/tgi_llama.yaml @@ -14,7 +14,7 @@ launcher: backend: device: cuda - device_ids: 4 + device_ids: all # no_weights: true model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 diff --git a/examples/trt_llama.yaml b/examples/trt_llama.yaml index 30cb600a4..1aa5ecc26 100644 --- a/examples/trt_llama.yaml +++ b/examples/trt_llama.yaml @@ -15,7 +15,6 @@ launcher: backend: device: cuda device_ids: 0 - no_weights: true model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: diff --git a/optimum_benchmark/backends/tensorrt_llm/config.py b/optimum_benchmark/backends/tensorrt_llm/config.py index d7f4b1cbb..d3512b0ee 100644 --- a/optimum_benchmark/backends/tensorrt_llm/config.py +++ b/optimum_benchmark/backends/tensorrt_llm/config.py @@ -24,7 +24,7 @@ class TRTLLMConfig(BackendConfig): world_size: int = 1 gpus_per_node: int = 1 - max_prompt_length: int = 128 + max_prompt_length: int = 256 max_new_tokens: int = -1 max_batch_size: int = 1 max_beam_width: int = 1 diff --git a/optimum_benchmark/backends/vllm/backend.py b/optimum_benchmark/backends/vllm/backend.py index e90f3e7ed..1616f1cb6 100644 --- a/optimum_benchmark/backends/vllm/backend.py +++ b/optimum_benchmark/backends/vllm/backend.py @@ -124,7 +124,6 @@ def batch_offline_engine_generate(self, inputs: Dict[str, Any], kwargs: Dict[str n=kwargs.get("num_return_sequences"), max_tokens=kwargs.get("max_new_tokens"), min_tokens=kwargs.get("min_new_tokens"), - use_beam_search=kwargs.get("num_beams") > 1, logits_processors=kwargs.get("logits_processors", None), ), ) @@ -143,7 +142,6 @@ async def single_online_engine_generate(self, prompt: str, request_id: str, kwar n=kwargs.get("num_return_sequences"), max_tokens=kwargs.get("max_new_tokens"), min_tokens=kwargs.get("min_new_tokens"), - use_beam_search=kwargs.get("num_beams") > 1, logits_processors=kwargs.get("logits_processors", None), ), ) diff --git a/optimum_benchmark/scenarios/energy_star/scenario.py b/optimum_benchmark/scenarios/energy_star/scenario.py index 3bf003ff9..db2ceea68 100644 --- a/optimum_benchmark/scenarios/energy_star/scenario.py +++ b/optimum_benchmark/scenarios/energy_star/scenario.py @@ -7,7 +7,7 @@ from tqdm import tqdm from ...backends.base import Backend, BackendConfigT -from ...benchmark.report import BenchmarkMeasurements, BenchmarkReport +from ...benchmark.report import BenchmarkReport, TargetMeasurements from ...import_utils import is_torch_distributed_available from ...task_utils import IMAGE_DIFFUSION_TASKS, TEXT_GENERATION_TASKS from ...trackers.energy import Efficiency, EnergyTracker @@ -61,22 +61,22 @@ @dataclass class TextGenerationReport(BenchmarkReport): - preprocess: BenchmarkMeasurements - per_token: BenchmarkMeasurements - prefill: BenchmarkMeasurements - decode: BenchmarkMeasurements + preprocess: TargetMeasurements + per_token: TargetMeasurements + prefill: TargetMeasurements + decode: TargetMeasurements @dataclass class ImageDiffusionReport(BenchmarkReport): - preprocess: BenchmarkMeasurements - call: BenchmarkMeasurements + preprocess: TargetMeasurements + call: TargetMeasurements @dataclass class InferenceReport(BenchmarkReport): - preprocess: BenchmarkMeasurements - forward: BenchmarkMeasurements + preprocess: TargetMeasurements + forward: TargetMeasurements class EnergyStarScenario(Scenario[EnergyStarConfig]): @@ -94,7 +94,9 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: ) self.config.input_shapes["batch_size"] //= torch.distributed.get_world_size() - self.energy_tracker = EnergyTracker(device=backend.config.device, device_ids=backend.config.device_ids) + self.energy_tracker = EnergyTracker( + device=backend.config.device, device_ids=backend.config.device_ids, backend=backend.NAME + ) LOGGER.info("\t+ Loading dataset") raw_dataset = load_dataset( @@ -112,7 +114,6 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: preprocessor=backend.pretrained_processor, ) self.preprocessing_energy = self.energy_tracker.get_energy() - self.energy_tracker.reset() LOGGER.info("\t+ Initialising dataloader") self.dataloader = DataLoader(self.dataset, batch_size=self.config.input_shapes["batch_size"]) @@ -122,36 +123,26 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: self.config.generate_kwargs = {**TEXT_GENERATION_WARMUP_OVERRIDES, **self.config.generate_kwargs} LOGGER.info("\t+ Initializing Text Generation report") self.report = TextGenerationReport( - preprocess=BenchmarkMeasurements(), - per_token=BenchmarkMeasurements(), - prefill=BenchmarkMeasurements(), - decode=BenchmarkMeasurements(), + preprocess=TargetMeasurements(), + per_token=TargetMeasurements(), + prefill=TargetMeasurements(), + decode=TargetMeasurements(), ) elif backend.config.task in IMAGE_DIFFUSION_TASKS: LOGGER.info("\t+ Updating Image Diffusion kwargs with default values") self.config.call_kwargs = {**IMAGE_DIFFUSION_WARMUP_OVERRIDES, **self.config.call_kwargs} LOGGER.info("\t+ Initializing Image Diffusion report") - self.report = ImageDiffusionReport(preprocess=BenchmarkMeasurements(), call=BenchmarkMeasurements()) + self.report = ImageDiffusionReport(preprocess=TargetMeasurements(), call=TargetMeasurements()) else: LOGGER.info("\t+ Initializing Inference report") - self.report = InferenceReport(preprocess=BenchmarkMeasurements(), forward=BenchmarkMeasurements()) + self.report = InferenceReport(preprocess=TargetMeasurements(), forward=TargetMeasurements()) self.report.preprocess.energy = self.preprocessing_energy self.report.preprocess.efficiency = Efficiency.from_energy( self.report.preprocess.energy, self.inference_volume, unit=INFERENCE_EFFICIENCY_UNIT ) - LOGGER.info("\t+ Preparing backend for Inference") - backend.prepare_for_inference( - input_shapes=self.config.input_shapes, - inference_kwargs={ - **self.config.generate_kwargs, - **self.config.forward_kwargs, - **self.config.call_kwargs, - }, - ) - LOGGER.info("\t+ Warming up backend for Inference") warmup_inputs = backend.prepare_inputs(next(iter(self.dataloader))) for _ in range(self.config.warmup_runs): @@ -195,7 +186,6 @@ def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]): self.report.prefill.efficiency = Efficiency.from_energy( self.report.prefill.energy, self.text_generation_prefill_volume, unit=TEXT_GENERATION_EFFICIENCY_UNIT ) - self.energy_tracker.reset() with self.energy_tracker.track(): for inputs in tqdm(self.dataloader): @@ -205,7 +195,6 @@ def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]): self.report.decode.efficiency = Efficiency.from_energy( self.report.decode.energy, self.text_generation_decode_volume, unit=TEXT_GENERATION_EFFICIENCY_UNIT ) - self.energy_tracker.reset() def run_image_diffusion_energy_tracking(self, backend: Backend[BackendConfigT]): LOGGER.info("\t+ Running energy tracking") @@ -219,7 +208,6 @@ def run_image_diffusion_energy_tracking(self, backend: Backend[BackendConfigT]): self.report.call.efficiency = Efficiency.from_energy( self.report.call.energy, self.image_diffusion_volume, unit=IMAGE_DIFFUSION_EFFICIENCY_UNIT ) - self.energy_tracker.reset() def run_inference_energy_tracking(self, backend: Backend[BackendConfigT]): LOGGER.info("\t+ Running energy tracking") @@ -233,7 +221,6 @@ def run_inference_energy_tracking(self, backend: Backend[BackendConfigT]): self.report.forward.efficiency = Efficiency.from_energy( self.report.forward.energy, self.inference_volume, unit=INFERENCE_EFFICIENCY_UNIT ) - self.energy_tracker.reset() @property def inference_volume(self) -> int: # in samples diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 000000000..dc9532403 --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,236 @@ +import os +import subprocess +import sys +from logging import getLogger +from pathlib import Path + +import pytest +import yaml + +from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output + +LOGGER = getLogger("test-example") + +EXAMPLES_DIR = Path(__file__).parent.parent / "examples" +OUTPUT_DIR = Path(__file__).parent.parent / "runs" +YAML_CONFIGS = [f for f in os.listdir(EXAMPLES_DIR) if f.endswith(".yaml") and f != "_base_.yaml"] +PYTHON_SCRIPTS = [f for f in os.listdir(EXAMPLES_DIR) if f.endswith(".py")] + +# can be run with pytest tests/test_example.py -s -k "cpu and ipex" +CPU_IPEX_CONFIGS = [ + "ipex_bert.yaml", + "ipex_llama.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cpu and neural-compressor" +CPU_NEURAL_COMPRESSOR_CONFIGS = [ + "neural_compressor_ptq_bert.yaml", + "numactl_bert.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cpu and onnxruntime" +CPU_ONNXRUNTIME_CONFIGS = [ + "onnxruntime_static_quant_vit.yaml", + "onnxruntime_timm.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cpu and openvino" +CPU_OPENVINO_CONFIGS = [ + "openvino_static_quant_bert.yaml", + "openvino_diffusion.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cpu and txi" +CPU_PY_TXI_CONFIGS = [ + "tei_bge.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cuda and pytorch" +CUDA_PYTORCH_CONFIGS = [ + "pytorch_bert.yaml", + "pytorch_gpt2.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cuda and txi" +CUDA_PY_TXI_CONFIGS = [ + "tgi_llama.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cuda and tensorrt_llm" +CUDA_TENSORRT_LLM_CONFIGS = [ + # "trt_llama.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cuda and vllm" +CUDA_VLLM_CONFIGS = [ + "vllm_llama.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "mps and llama_cpp" +MPS_LLAMA_CPP_CONFIGS = [ + "llama_cpp_embedding.yaml", + "llama_cpp_text_generation.yaml", +] + +# can be run with pytest tests/test_example.py -s -k "cuda and pytorch" +CUDA_PYTORCH_SCRIPTS = [ + "pytorch_bert.py", + "pytorch_gpt2.py", +] + +# Those tests are not run on the CI/CD pipeline as they are currently broken +UNTESTED_YAML_CONFIGS = [ + "energy_star.yaml", + "trt_llama.yaml", + "pytorch_bert_mps.yaml", +] + +ALL_YAML_CONFIGS = ( + CUDA_PYTORCH_CONFIGS + + CPU_IPEX_CONFIGS + + MPS_LLAMA_CPP_CONFIGS + + CPU_NEURAL_COMPRESSOR_CONFIGS + + CPU_ONNXRUNTIME_CONFIGS + + CPU_OPENVINO_CONFIGS + + CPU_PY_TXI_CONFIGS + + CUDA_PY_TXI_CONFIGS + + CUDA_TENSORRT_LLM_CONFIGS + + CUDA_VLLM_CONFIGS + + UNTESTED_YAML_CONFIGS +) + +ALL_PYTHON_SCRIPTS = CUDA_PYTORCH_SCRIPTS + +assert set(ALL_YAML_CONFIGS) == set(YAML_CONFIGS), ( + f"Please add your new example config to the list of configs in test_example.py for it to be integrated in the CI/CD pipeline.\n" + f"Difference between ALL_YAML_CONFIGS and YAML_CONFIGS:\n" + f"In ALL_YAML_CONFIGS but not in YAML_CONFIGS: {set(ALL_YAML_CONFIGS) - set(YAML_CONFIGS)}\n" + f"In YAML_CONFIGS but not in ALL_YAML_CONFIGS: {set(YAML_CONFIGS) - set(ALL_YAML_CONFIGS)}" +) + +assert set(PYTHON_SCRIPTS) == set(ALL_PYTHON_SCRIPTS), ( + f"Please add your new example script to the list of scripts in test_example.py for it to be integrated in the CI/CD pipeline.\n" + f"Difference between PYTHON_SCRIPTS and ALL_PYTHON_SCRIPTS:\n" + f"In PYTHON_SCRIPTS but not in ALL_PYTHON_SCRIPTS: {set(PYTHON_SCRIPTS) - set(ALL_PYTHON_SCRIPTS)}\n" + f"In ALL_PYTHON_SCRIPTS but not in PYTHON_SCRIPTS: {set(ALL_PYTHON_SCRIPTS) - set(PYTHON_SCRIPTS)}" +) + + +def extract_name_from_yaml(config_name): + config_path = EXAMPLES_DIR / config_name + + with open(config_path, "r") as f: + yaml_content = f.read() + + data = yaml.safe_load(yaml_content) + + return data.get("name") + + +def test_yaml_config(config_name): + name = extract_name_from_yaml(config_name) + + args = [ + "optimum-benchmark", + "--config-dir", + str(EXAMPLES_DIR), + "--config-name", + config_name.split(".")[0], + ] + + popen = run_subprocess_and_log_stream_output(LOGGER, args) + assert popen.returncode == 0, f"Failed to run {config_name}" + + # Check if the benchmark produced any output + output_dir = Path(OUTPUT_DIR) / name + assert output_dir.exists(), f"No output directory found for {config_name}" + + # Check if there's at least one file in the output directory + output_files = list(output_dir.glob("*")) + assert len(output_files) > 0, f"No output files found for {config_name}" + + +def execute_python_script(script_name): + script_path = EXAMPLES_DIR / script_name + # Run the example file as a separate process + process = subprocess.Popen( + [sys.executable, str(script_path)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + + # Capture and display output in real-time + while True: + output = process.stdout.readline() + if output == "" and process.poll() is not None: + break + if output: + print(output.strip()) + sys.stdout.flush() + + # Capture any remaining output + stdout, stderr = process.communicate() + + # Create a result object similar to subprocess.run + result = subprocess.CompletedProcess( + args=[sys.executable, str(script_path)], returncode=process.returncode, stdout=stdout, stderr=stderr + ) + + # Check that the process completed successfully (return code 0) + assert result.returncode == 0, f"Script {script_path} failed with error:\n{result.stderr}" + + # Check that there's no error output + assert not result.stderr, f"Script {script_path} produced error output:\n{result.stderr}" + + +@pytest.mark.parametrize("config_name", CUDA_PYTORCH_CONFIGS) +def test_cuda_pytorch_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CPU_IPEX_CONFIGS) +def test_cpu_ipex_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", MPS_LLAMA_CPP_CONFIGS) +def test_mps_llama_cpp_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CPU_NEURAL_COMPRESSOR_CONFIGS) +def test_cpu_neural_compressor_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CPU_ONNXRUNTIME_CONFIGS) +def test_cpu_onnxruntime_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CPU_OPENVINO_CONFIGS) +def test_cpu_openvino_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CPU_PY_TXI_CONFIGS) +def test_cpu_py_txi_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CUDA_PY_TXI_CONFIGS) +def test_cuda_py_txi_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CUDA_TENSORRT_LLM_CONFIGS) +def test_cuda_tensorrt_llm_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("config_name", CUDA_VLLM_CONFIGS) +def test_cuda_vllm_configs(config_name): + test_yaml_config(config_name) + + +@pytest.mark.parametrize("script_name", CUDA_PYTORCH_SCRIPTS) +def test_cuda_pytorch_scripts(script_name): + execute_python_script(script_name)