From 30d54bf0f25b94f5f6a68291e4b60e84e90eeefe Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Thu, 3 Oct 2024 13:38:59 -0400 Subject: [PATCH] CPU feature logging Signed-off-by: Benoit Jacob --- build_tools/bazel/iree_check_test.bzl | 24 +-- .../bazel/iree_e2e_generated_runner_test.bzl | 23 +-- .../bazel_to_cmake_converter.py | 5 - build_tools/cmake/iree_check_test.cmake | 48 ++--- .../iree_e2e_generated_runner_test.cmake | 40 ++-- .../cmake/iree_hal_cts_test_suite.cmake | 3 + .../cmake/iree_static_linker_test.cmake | 2 + compiler/plugins/target/LLVMCPU/BUILD.bazel | 16 ++ .../plugins/target/LLVMCPU/CMakeLists.txt | 15 ++ .../target/LLVMCPU/LLVMTargetOptions.cpp | 144 ++++--------- .../target/LLVMCPU/LLVMTargetOptions.h | 26 +-- .../LLVMCPU/ResolveCPUAndCPUFeatures.cpp | 191 ++++++++++++++++++ .../target/LLVMCPU/ResolveCPUAndCPUFeatures.h | 33 +++ samples/simple_embedding/BUILD.bazel | 3 + samples/simple_embedding/CMakeLists.txt | 3 + samples/static_library/CMakeLists.txt | 2 + tests/e2e/attention/CMakeLists.txt | 6 - tests/e2e/convolution/BUILD.bazel | 2 - tests/e2e/convolution/CMakeLists.txt | 24 --- tests/e2e/linalg/BUILD.bazel | 2 + tests/e2e/linalg/CMakeLists.txt | 3 + tests/e2e/linalg_ext_ops/BUILD.bazel | 1 + tests/e2e/linalg_ext_ops/CMakeLists.txt | 2 + tests/e2e/matmul/BUILD.bazel | 6 +- tests/e2e/matmul/CMakeLists.txt | 48 ++--- tests/e2e/regression/BUILD.bazel | 9 +- tests/e2e/regression/CMakeLists.txt | 9 +- tests/e2e/stablehlo_ops/BUILD.bazel | 3 +- tests/e2e/stablehlo_ops/CMakeLists.txt | 3 +- tests/e2e/tensor_ops/BUILD.bazel | 1 + tests/e2e/tensor_ops/CMakeLists.txt | 2 + tests/e2e/tosa_ops/BUILD.bazel | 1 + tests/e2e/tosa_ops/CMakeLists.txt | 2 + 33 files changed, 433 insertions(+), 269 deletions(-) create mode 100644 compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.cpp create mode 100644 compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.h diff --git a/build_tools/bazel/iree_check_test.bzl b/build_tools/bazel/iree_check_test.bzl index e76a8fa0f415..20a615f3c233 100644 --- a/build_tools/bazel/iree_check_test.bzl +++ b/build_tools/bazel/iree_check_test.bzl @@ -24,7 +24,6 @@ def iree_check_test( input_type = None, runner_args = [], tags = [], - target_cpu_features = None, timeout = None, **kwargs): """Creates an iree-check-module test for the specified source file. @@ -43,19 +42,16 @@ def iree_check_test( driver and input file are passed automatically. tags: additional tags to apply to the generated test. Tag "driver=DRIVER" and "target=TARGET" are added automatically. - target_cpu_features: currently unimplemented (must be empty), will - eventually allow specifying target CPU features. timeout: timeout for the generated tests. **kwargs: any additional attributes to pass to the underlying native_test. """ - if target_cpu_features: - fail("target_cpu_features must currently be empty") input_type_flags = [] if input_type: input_type_flags = ["--iree-input-type=%s" % input_type] flags = [ "--iree-hal-target-backends=%s" % target_backend, + "--iree-hal-target-cpu=generic", ] + compiler_flags + input_type_flags bytecode_module_name = name + "_bytecode_module" iree_bytecode_module( @@ -91,7 +87,6 @@ def iree_check_single_backend_test_suite( input_type = None, runner_args = [], tags = [], - target_cpu_features = None, timeout = None, **kwargs): """Creates a test suite of iree-check-module tests for a single backend/driver pair. @@ -112,8 +107,6 @@ def iree_check_single_backend_test_suite( iree-check-module tests. The driver and input file are passed automatically. To use different runner_args per test, create a separate suite or iree_check_test. - target_cpu_features: currently unimplemented (must be empty), will - eventually allow specifying target CPU features. tags: tags to apply to the generated tests. Note that as in standard test suites, manual is treated specially and will also apply to the test suite itself. @@ -130,13 +123,6 @@ def iree_check_single_backend_test_suite( if target_backend == "rocm" or driver == "hip": return - # We haven't implemented this so far because we have been using target_cpu_features so far only - # for aarch64 targets, for which we use the CMake build. To future people implementing this: - # target_cpu_features should be a list, and here it should be joined into a comma-separated - # string to be passed to --iree-llvmcpu-target-cpu-features - if target_cpu_features: - fail("target_cpu_features must currently be empty") - tests = [] for src in srcs: test_name = "_".join([name, src]) @@ -200,10 +186,10 @@ def iree_check_test_suite( Currently unimplemented in Bazel due to difficulty of specializing to target architecture in Bazel. The following describes the semantics that this should have if implemented. Each - entry is either "default" for the architecture defaults, or a colon- - separated triple "arch:name:cpu_features" where "arch" filters - for a target CPU architecture (in IREE_ARCH format), "name" is a - short name for the CPU features set (used to generate target names) + entry is either "generic" for the architecture defaults, or "host" + for the host CPU,or a colon-separated triple "arch:name:cpu_features" + where "arch" filters for a target CPU architecture (in IREE_ARCH format), + "name" is a short name for the CPU features set (used to generate target names) and cpu_features is a comma-separated list of LLVM target attributes to enable. Example: x86_64:avx2_fma:+avx,+avx2,+fma diff --git a/build_tools/bazel/iree_e2e_generated_runner_test.bzl b/build_tools/bazel/iree_e2e_generated_runner_test.bzl index 9d17d50808d0..e5843a324ec4 100644 --- a/build_tools/bazel/iree_e2e_generated_runner_test.bzl +++ b/build_tools/bazel/iree_e2e_generated_runner_test.bzl @@ -22,7 +22,6 @@ def iree_e2e_runner_test( compiler_flags = [], runner_args = [], tags = [], - target_cpu_features = None, timeout = None, **kwargs): """Creates a test using a specified test runner program. @@ -44,23 +43,18 @@ def iree_e2e_runner_test( added automatically. test_runner: test runner program to run. timeout: timeout for the generated tests. - target_cpu_features: target CPU features. Only for llvm-cpu backend. **kwargs: any additional attributes to pass to the underlying tests and test suite. """ - if target_cpu_features: - fail("target_cpu_features must currently be empty") - iree_bytecode_module( name = name + "_%s_module" % test_type, module_name = tests_vmfb, src = tests_src, flags = [ "--iree-hal-target-backends=%s" % target_backend, - ] + ([ - "--iree-llvmcpu-target-cpu-features=%s" % target_cpu_features, - ] if target_cpu_features else []) + compiler_flags, + "--iree-hal-target-cpu=generic", + ] + compiler_flags, visibility = ["//visibility:private"], testonly = True, **kwargs @@ -106,7 +100,6 @@ def iree_single_backend_e2e_runner_test( compiler_flags = [], runner_args = [], tags = [], - target_cpu_features = None, timeout = None, **kwargs): """Generates an iree_e2e_runner_test using a custom python generator script. @@ -133,7 +126,6 @@ def iree_single_backend_e2e_runner_test( added automatically. test_runner: test runner program to run. timeout: timeout for the generated tests. - target_cpu_features: target CPU features. Only for llvm-cpu backend. **kwargs: any additional attributes to pass to the underlying tests and test suite. """ @@ -171,7 +163,6 @@ def iree_single_backend_e2e_runner_test( runner_args = runner_args, tags = tags, timeout = timeout, - target_cpu_features = target_cpu_features, **kwargs ) @@ -213,13 +204,13 @@ def iree_generated_e2e_runner_test( Currently unimplemented in Bazel due to difficulty of specializing to target architecture in Bazel. The following describes the semantics that this should have if implemented. Each - entry is either "default" for the architecture defaults, or a colon- - separated triple "arch:name:cpu_features" where "arch" filters - for a target CPU architecture (in IREE_ARCH format), "name" is a - short name for the CPU features set (used to generate target names) + entry is either "generic" for the architecture defaults, or "host" + for the host CPU,or a colon-separated triple "arch:name:cpu_features" + where "arch" filters for a target CPU architecture (in IREE_ARCH format), + "name" is a short name for the CPU features set (used to generate target names) and cpu_features is a comma-separated list of LLVM target attributes to enable. Example: - x86_64:avx2_fma:+avx,+avx2,+fma + x86_64:avx2_fma:+avx,+avx2,+fma **kwargs: any additional attributes to pass to the underlying tests and test suite. """ diff --git a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py index eb5d2b1ddd67..4f8084f14ff8 100644 --- a/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py +++ b/build_tools/bazel_to_cmake/bazel_to_cmake_converter.py @@ -769,7 +769,6 @@ def iree_check_single_backend_test_suite( target_backends_and_drivers=None, runner_args=None, tags=None, - target_cpu_features=None, timeout=None, **kwargs, ): @@ -787,9 +786,6 @@ def iree_check_single_backend_test_suite( input_type_block = self._convert_string_arg_block("INPUT_TYPE", input_type) runner_args_block = self._convert_string_list_block("RUNNER_ARGS", runner_args) labels_block = self._convert_string_list_block("LABELS", tags) - target_cpu_features_block = self._convert_string_arg_block( - "TARGET_CPU_FEATURES", target_cpu_features - ) timeout_block = self._convert_timeout_arg_block("TIMEOUT", timeout) self._converter.body += ( @@ -802,7 +798,6 @@ def iree_check_single_backend_test_suite( f"{input_type_block}" f"{runner_args_block}" f"{labels_block}" - f"{target_cpu_features_block}" f"{timeout_block}" f")\n\n" ) diff --git a/build_tools/cmake/iree_check_test.cmake b/build_tools/cmake/iree_check_test.cmake index cb6d32293d07..b00741c1905c 100644 --- a/build_tools/cmake/iree_check_test.cmake +++ b/build_tools/cmake/iree_check_test.cmake @@ -39,8 +39,6 @@ endfunction() # "driver=${DRIVER}" are added automatically. # MODULE_FILE_NAME: Optional, specifies the absolute path to the filename # to use for the generated IREE module (.vmfb). -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. # DEPENDS: Optional. Additional dependencies beyond SRC and the tools. # INPUT_TYPE: The value for the --iree-input-type= flag. Also disables tests # if no compiled support for that configuration. @@ -53,7 +51,7 @@ function(iree_check_test) _RULE "" "NAME;SRC;TARGET_BACKEND;DRIVER;MODULE_FILE_NAME;INPUT_TYPE" - "COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES;DEPENDS;TIMEOUT" + "COMPILER_FLAGS;RUNNER_ARGS;LABELS;DEPENDS;TIMEOUT" ${ARGN} ) @@ -162,9 +160,6 @@ function(iree_check_test) if(_RULE_INPUT_TYPE) list(APPEND _BASE_COMPILER_FLAGS "--iree-input-type=${_RULE_INPUT_TYPE}") endif() - if(_RULE_TARGET_CPU_FEATURES) - list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}") - endif() if(_NORMALIZED_TARGET_BACKEND STREQUAL "ROCM") list(APPEND _BASE_COMPILER_FLAGS "--iree-hip-target=${IREE_HIP_TEST_TARGET_CHIP}") endif() @@ -242,8 +237,6 @@ endfunction() # different args per test, create a separate suite or iree_check_test. # LABELS: Additional labels to apply to the generated tests. The package path # is added automatically. -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. # DEPENDS: Optional. Additional dependencies beyond SRC and the tools. # INPUT_TYPE: The value for the --iree-input-type= flag. Also disables tests # if no compiled support for that configuration. @@ -256,7 +249,7 @@ function(iree_check_single_backend_test_suite) _RULE "" "NAME;TARGET_BACKEND;DRIVER;INPUT_TYPE" - "SRCS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES;DEPENDS;TIMEOUT" + "SRCS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;DEPENDS;TIMEOUT" ${ARGN} ) @@ -284,7 +277,6 @@ function(iree_check_single_backend_test_suite) INPUT_TYPE ${_RULE_INPUT_TYPE} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} DEPENDS ${_RULE_DEPENDS} TIMEOUT ${_RULE_TIMEOUT} ) @@ -309,7 +301,6 @@ function(iree_check_single_backend_test_suite) INPUT_TYPE ${_RULE_INPUT_TYPE} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} DEPENDS ${_RULE_DEPENDS} TIMEOUT ${_RULE_TIMEOUT} ) @@ -328,7 +319,6 @@ function(iree_check_single_backend_test_suite) INPUT_TYPE ${_RULE_INPUT_TYPE} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} DEPENDS ${_RULE_DEPENDS} TIMEOUT ${_RULE_TIMEOUT} ) @@ -342,9 +332,7 @@ endfunction() # This function has 3 output-params: variables that it sets with PARENT_SCOPE: # _ENABLED, _FEATURES_NAME, _FEATURES. # -# "default" is handled specially. _ENABLED is always set to "TRUE" and -# _FEATURES_NAME and _FEATURES are set to -# the empty string. +# "generic" and "host" are handled specially, as CPU names. # # Other values are parsed as "arch:features_name:features". The `arch` # component is matched with `IREE_ARCH`, `_ENABLED` is set to "TRUE" if and @@ -354,23 +342,28 @@ endfunction() # # Examples: # -# default: -# _ENABLED="TRUE" unconditionally, -# other output strings are "". +# generic: +# _ENABLED="TRUE" unconditionally, _FEATURES_NAME="generic", _FEATURES="". +# +# host: +# _ENABLED="TRUE" unconditionally, _FEATURES_NAME="host", _FEATURES="". # # aarch64:dotprod:+dotprod: # _ENABLED="TRUE" if the target architecture is aarch64, and in that case: # _FEATURES_NAME="dotprod". # _FEATURES="+dotprod". function(parse_target_cpu_features_variant _VARIANT_STRING _ENABLED_VAR - _FEATURES_NAME_VAR _FEATURES_VAR) + _FEATURES_NAME_VAR _COMPILER_FLAGS_VAR) set("${_ENABLED_VAR}" FALSE PARENT_SCOPE) set("${_FEATURES_NAME_VAR}" "" PARENT_SCOPE) - set("${_FEATURES_VAR}" "" PARENT_SCOPE) - if("${_VARIANT_STRING}" STREQUAL "default") + set("${_COMPILER_FLAGS_VAR}" "" PARENT_SCOPE) + if("${_VARIANT_STRING}" STREQUAL "generic" OR "${_VARIANT_STRING}" STREQUAL "host") set("${_ENABLED_VAR}" TRUE PARENT_SCOPE) + set("${_FEATURES_NAME_VAR}" "${_VARIANT_STRING}" PARENT_SCOPE) + set("${_COMPILER_FLAGS_VAR}" "--iree-llvmcpu-target-cpu=${_VARIANT_STRING}" PARENT_SCOPE) return() endif() + # Interpret _VARIANT_STRING as a CMake list (;-separated). string(REPLACE ":" ";" _COMPONENTS "${_VARIANT_STRING}") list(LENGTH _COMPONENTS _NUM_COMPONENTS) @@ -385,7 +378,7 @@ function(parse_target_cpu_features_variant _VARIANT_STRING _ENABLED_VAR if(_FILTER_ARCH STREQUAL IREE_ARCH) set("${_ENABLED_VAR}" TRUE PARENT_SCOPE) set("${_FEATURES_NAME_VAR}" "${_FEATURES_NAME}" PARENT_SCOPE) - set("${_FEATURES_VAR}" "${_FEATURES}" PARENT_SCOPE) + set("${_COMPILER_FLAGS_VAR}" "--iree-llvmcpu-target-cpu-features=${_FEATURES}" PARENT_SCOPE) endif() endfunction() @@ -413,8 +406,8 @@ endfunction() # LABELS: Additional labels to apply to the generated tests. The package path is # added automatically. # TARGET_CPU_FEATURES_VARIANTS: list of target cpu features variants. Each -# entry is either "default" for the architecture defaults, or a colon- -# separated triple "arch:name:cpu_features" where "arch" filters +# entry is either "generic" for the architecture defaults, or "host" for +# the host CPU, or a colon-separated triple "arch:name:cpu_features" where "arch" filters # for a target CPU architecture (in IREE_ARCH format), "name" is a # short name for the CPU features set (used to generate target names) # and cpu_features is a comma-separated list of LLVM target attributes @@ -443,7 +436,7 @@ function(iree_check_test_suite) if(_RULE_TARGET_CPU_FEATURES_VARIANTS) set(_TARGET_CPU_FEATURES_VARIANTS "${_RULE_TARGET_CPU_FEATURES_VARIANTS}") else() - set(_TARGET_CPU_FEATURES_VARIANTS "default") + set(_TARGET_CPU_FEATURES_VARIANTS "generic") endif() if(NOT DEFINED _RULE_TARGET_BACKENDS AND NOT DEFINED _RULE_DRIVERS) @@ -466,7 +459,7 @@ function(iree_check_test_suite) list(GET _RULE_DRIVERS ${_INDEX} _DRIVER) foreach(_VARIANT_STRING IN LISTS _TARGET_CPU_FEATURES_VARIANTS) parse_target_cpu_features_variant("${_VARIANT_STRING}" - _ENABLED _TARGET_CPU_FEATURES_NAME _TARGET_CPU_FEATURES) + _ENABLED _TARGET_CPU_FEATURES_NAME _VARIANT_COMPILER_FLAGS) if(NOT _ENABLED) # The current entry is disabled on the target CPU architecture. continue() @@ -488,12 +481,11 @@ function(iree_check_test_suite) ${_DRIVER} COMPILER_FLAGS ${_RULE_COMPILER_FLAGS} + ${_VARIANT_COMPILER_FLAGS} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_LABELS} - TARGET_CPU_FEATURES - ${_TARGET_CPU_FEATURES} TIMEOUT ${_RULE_TIMEOUT} INPUT_TYPE diff --git a/build_tools/cmake/iree_e2e_generated_runner_test.cmake b/build_tools/cmake/iree_e2e_generated_runner_test.cmake index 585d9906f112..658e7d8d896a 100644 --- a/build_tools/cmake/iree_e2e_generated_runner_test.cmake +++ b/build_tools/cmake/iree_e2e_generated_runner_test.cmake @@ -29,8 +29,6 @@ include(CMakeParseArguments) # LABELS: Additional labels to apply to the test. The package path and # "driver=${DRIVER}" are added automatically. # TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. # TEST_DEFINED: Whether to define a test target. # TEST_DISABLED: The test target will be skipped and its status will be # 'Not Run'. @@ -48,7 +46,7 @@ function(iree_e2e_runner_test) _RULE "" "NAME;TEST_TYPE;VARIANT_NAME;TESTS_SRC;TESTS_VMFB;CALLS_SRC;CALLS_VMFB;TRACE;TARGET_BACKEND;DRIVER;TEST_RUNNER;TEST_DEFINED;TEST_DISABLED" - "COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES" + "COMPILER_FLAGS;RUNNER_ARGS;LABELS" ${ARGN} ) @@ -63,9 +61,6 @@ function(iree_e2e_runner_test) set(_BASE_COMPILER_FLAGS "--iree-hal-target-backends=${_RULE_TARGET_BACKEND}" ) - if (_RULE_TARGET_CPU_FEATURES) - list(APPEND _BASE_COMPILER_FLAGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}") - endif() if(NOT TARGET "${_NAME}_${_RULE_TEST_TYPE}_module") iree_bytecode_module( @@ -140,7 +135,7 @@ endfunction() # invoked with the following standard flags, in addition to GENERATOR_ARGS: # --output_${TEST_TYPE}_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_${TEST_TYPE}.mlir # --output_calls_mlir=${CMAKE_CURRENT_BINARY_DIR}/name_calls.mlir -# and if TARGET_CPU_FEATURES is not empty: +# and if COMPILER_FLAGS contains "--iree-llvmcpu-target-cpu_features=${TARGET_CPU_FEATURES}": # --requirements=${TARGET_CPU_FEATURES} # GENERATOR_ARGS: additional args to pass to the generator program. # TARGET_BACKEND: target backend to compile for. @@ -152,8 +147,6 @@ endfunction() # LABELS: Additional labels to apply to the test. The package path and # "driver=${DRIVER}" are added automatically. # TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES: If specified, a string passed as argument to -# --iree-llvmcpu-target-cpu-features. function(iree_single_backend_e2e_runner_test) if(NOT IREE_BUILD_TESTS) return() @@ -163,12 +156,11 @@ function(iree_single_backend_e2e_runner_test) if(NOT IREE_BUILD_COMPILER AND NOT IREE_HOST_BIN_DIR) return() endif() - cmake_parse_arguments( _RULE "" "NAME;TEST_TYPE;GENERATOR;TARGET_BACKEND;DRIVER;TEST_RUNNER" - "GENERATOR_ARGS;COMPILER_FLAGS;RUNNER_ARGS;LABELS;TARGET_CPU_FEATURES" + "GENERATOR_ARGS;COMPILER_FLAGS;RUNNER_ARGS;LABELS" ${ARGN} ) @@ -251,9 +243,13 @@ function(iree_single_backend_e2e_runner_test) list(APPEND _GENERATOR_STANDARD_FLAGS "--output_${_RULE_TEST_TYPE}_mlir=${_TESTS_SRC}") list(APPEND _GENERATOR_STANDARD_FLAGS "--output_calls_mlir=${_CALLS_SRC}") - if(_RULE_TARGET_CPU_FEATURES) - list(APPEND _GENERATOR_STANDARD_FLAGS "--requirements=${_RULE_TARGET_CPU_FEATURES}") - endif() + foreach(_COMPILER_FLAG IN LISTS _RULE_COMPILER_FLAGS) + set(_CPU_FEATURES_REGEX "^--iree-llvmcpu-target-cpu-features=") + if (_COMPILER_FLAG MATCHES "${_CPU_FEATURES_REGEX}") + string(REGEX REPLACE "${_CPU_FEATURES_REGEX}" "" _CPU_FEATURES "${_COMPILER_FLAG}") + list(APPEND _GENERATOR_STANDARD_FLAGS "--requirements=${_CPU_FEATURES}") + endif() + endforeach() if(NOT _BYTECODE_MODULE_BUILD_ENABLED) return() @@ -304,7 +300,6 @@ function(iree_single_backend_e2e_runner_test) COMPILER_FLAGS ${_RULE_COMPILER_FLAGS} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} TEST_DEFINED ${_TEST_DEFINED} TEST_DISABLED ${_TEST_DISABLED} ) @@ -338,7 +333,6 @@ function(iree_single_backend_e2e_runner_test) COMPILER_FLAGS ${_ASAN_COMPILER_FLAGS} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} TEST_DEFINED ${_TEST_DEFINED} TEST_DISABLED ${_TEST_DISABLED} ) @@ -365,7 +359,6 @@ function(iree_single_backend_e2e_runner_test) COMPILER_FLAGS ${_TSAN_COMPILER_FLAGS} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_RULE_LABELS} - TARGET_CPU_FEATURES ${_RULE_TARGET_CPU_FEATURES} TEST_DEFINED ${_TEST_DEFINED} TEST_DISABLED ${_TEST_DISABLED} ) @@ -410,9 +403,9 @@ endfunction() # LABELS: Additional labels to apply to the test. The package path and # "driver=${DRIVER}" are added automatically. # TEST_RUNNER: trace-runner program to run. -# TARGET_CPU_FEATURES_VARIANTS:list of target cpu features variants. Each -# entry is either "default" for the architecture defaults, or a colon- -# separated triple "arch:name:cpu_features" where "arch" filters +# TARGET_CPU_FEATURES_VARIANTS: list of target cpu features variants. Each +# entry is either "generic" for the architecture defaults, or "host" for +# the host CPU, or a colon-separated triple "arch:name:cpu_features" where "arch" filters # for a target CPU architecture (in IREE_ARCH format), "name" is a # short name for the CPU features set (used to generate target names) # and cpu_features is a comma-separated list of LLVM target attributes @@ -439,7 +432,7 @@ function(iree_generated_e2e_runner_test) if(_RULE_TARGET_CPU_FEATURES_VARIANTS) set(_TARGET_CPU_FEATURES_VARIANTS "${_RULE_TARGET_CPU_FEATURES_VARIANTS}") else() - set(_TARGET_CPU_FEATURES_VARIANTS "default") + set(_TARGET_CPU_FEATURES_VARIANTS "generic") endif() @@ -462,7 +455,7 @@ function(iree_generated_e2e_runner_test) list(GET _RULE_DRIVERS ${_INDEX} _DRIVER) foreach(_VARIANT_STRING IN LISTS _TARGET_CPU_FEATURES_VARIANTS) parse_target_cpu_features_variant("${_VARIANT_STRING}" - _ENABLED _TARGET_CPU_FEATURES_NAME _TARGET_CPU_FEATURES) + _ENABLED _TARGET_CPU_FEATURES_NAME _VARIANT_COMPILER_FLAGS) if(NOT _ENABLED) # The current entry is disabled on the target CPU architecture. continue() @@ -490,12 +483,11 @@ function(iree_generated_e2e_runner_test) ${_DRIVER} COMPILER_FLAGS ${_RULE_COMPILER_FLAGS} + ${_VARIANT_COMPILER_FLAGS} RUNNER_ARGS ${_RULE_RUNNER_ARGS} LABELS ${_LABELS} - TARGET_CPU_FEATURES - ${_TARGET_CPU_FEATURES} ) endforeach() endforeach() diff --git a/build_tools/cmake/iree_hal_cts_test_suite.cmake b/build_tools/cmake/iree_hal_cts_test_suite.cmake index ce229cbab1a4..fbb596867628 100644 --- a/build_tools/cmake/iree_hal_cts_test_suite.cmake +++ b/build_tools/cmake/iree_hal_cts_test_suite.cmake @@ -87,6 +87,9 @@ function(iree_hal_cts_test_suite) "--iree-hal-target-backends=${_RULE_COMPILER_TARGET_BACKEND}" ${_RULE_COMPILER_FLAGS} ) + if(_RULE_COMPILER_TARGET_BACKEND STREQUAL "llvm-cpu") + list(APPEND _TRANSLATE_FLAGS "--iree-llvmcpu-target-cpu=generic") + endif() # Skip if already created (multiple suites using the same compiler setting). iree_package_name(_PACKAGE_NAME) diff --git a/build_tools/cmake/iree_static_linker_test.cmake b/build_tools/cmake/iree_static_linker_test.cmake index 5db08d457cd2..09db2eb0701e 100644 --- a/build_tools/cmake/iree_static_linker_test.cmake +++ b/build_tools/cmake/iree_static_linker_test.cmake @@ -83,6 +83,8 @@ function(iree_static_linker_test) list(APPEND _COMPILER_ARGS "--iree-hal-target-backends=llvm-cpu") if(_RULE_TARGET_CPU_FEATURES) list(APPEND _COMPILER_ARGS "--iree-llvmcpu-target-cpu-features=${_RULE_TARGET_CPU_FEATURES}") + else() + list(APPEND _COMPILER_ARGS "--iree-llvmcpu-target-cpu=generic") endif() if(_RULE_EMITC) diff --git a/compiler/plugins/target/LLVMCPU/BUILD.bazel b/compiler/plugins/target/LLVMCPU/BUILD.bazel index 0e765fc6a417..c086f548118e 100644 --- a/compiler/plugins/target/LLVMCPU/BUILD.bazel +++ b/compiler/plugins/target/LLVMCPU/BUILD.bazel @@ -100,6 +100,21 @@ iree_compiler_cc_library( ], ) +iree_compiler_cc_library( + name = "ResolveCPUAndCPUFeatures", + srcs = [ + "ResolveCPUAndCPUFeatures.cpp", + ], + hdrs = [ + "ResolveCPUAndCPUFeatures.h", + ], + deps = [ + "@llvm-project//llvm:Support", + "@llvm-project//llvm:Target", + "@llvm-project//llvm:TargetParser", + ], +) + iree_compiler_cc_library( name = "LLVMTargetOptions", srcs = [ @@ -109,6 +124,7 @@ iree_compiler_cc_library( "LLVMTargetOptions.h", ], deps = [ + ":ResolveCPUAndCPUFeatures", "//compiler/src/iree/compiler/Utils", "@llvm-project//llvm:Analysis", "@llvm-project//llvm:Core", diff --git a/compiler/plugins/target/LLVMCPU/CMakeLists.txt b/compiler/plugins/target/LLVMCPU/CMakeLists.txt index 5d372d01d355..fea4eb84b118 100644 --- a/compiler/plugins/target/LLVMCPU/CMakeLists.txt +++ b/compiler/plugins/target/LLVMCPU/CMakeLists.txt @@ -89,6 +89,20 @@ iree_cc_library( PUBLIC ) +iree_cc_library( + NAME + ResolveCPUAndCPUFeatures + HDRS + "ResolveCPUAndCPUFeatures.h" + SRCS + "ResolveCPUAndCPUFeatures.cpp" + DEPS + LLVMSupport + LLVMTarget + LLVMTargetParser + PUBLIC +) + iree_cc_library( NAME LLVMTargetOptions @@ -97,6 +111,7 @@ iree_cc_library( SRCS "LLVMTargetOptions.cpp" DEPS + ::ResolveCPUAndCPUFeatures LLVMAnalysis LLVMCore LLVMMC diff --git a/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.cpp b/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.cpp index a9da1c9b3f26..80801b8cd628 100644 --- a/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.cpp +++ b/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.cpp @@ -6,8 +6,7 @@ #include "compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h" -#include - +#include "compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -19,88 +18,11 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/Host.h" -#include "llvm/TargetParser/RISCVTargetParser.h" -#include "llvm/TargetParser/SubtargetFeature.h" #include "llvm/TargetParser/Triple.h" -#include "llvm/TargetParser/X86TargetParser.h" #include "mlir/IR/Builders.h" namespace mlir::iree_compiler::IREE::HAL { -namespace { - -bool resolveCPUAndCPUFeatures(llvm::StringRef inCpu, - llvm::StringRef inCpuFeatures, - const llvm::Triple &triple, std::string &outCpu, - std::string &outCpuFeatures) { - // Resolve "host" - if (inCpu == "host" || inCpuFeatures == "host") { - // If either Cpu or CpuFeatures is "host", the other must be either also - // host or the default value. - bool isCpuHostOrDefault = - inCpu.empty() || inCpu == "host" || inCpu == "generic"; - bool isCpuFeaturesHostOrDefault = - inCpuFeatures.empty() || inCpuFeatures == "host"; - if (!(isCpuHostOrDefault && isCpuFeaturesHostOrDefault)) { - llvm::errs() - << "error: If either cpu or CpuFeatures is `host`, the other must " - "be either also `host` or the default value\n"; - return false; - } - outCpu = triple.isX86() ? llvm::sys::getHostCPUName().str() : ""; - llvm::SubtargetFeatures features; - for (auto &feature : llvm::sys::getHostCPUFeatures()) { - features.AddFeature(feature.first(), feature.second); - } - outCpuFeatures = features.getString(); - } else { - outCpu = inCpu; - outCpuFeatures = inCpuFeatures; - } - - // Target-specific CPU feature tweaks that we need unconditionally. - if (triple.isAArch64()) { - llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures); - // x18 is platform-reserved per the Aarch64 procedure call specification. - targetCpuFeatures.AddFeature("reserve-x18", true); - outCpuFeatures = targetCpuFeatures.getString(); - } - - if (outCpu.empty() || inCpu == "host" || inCpu == "generic" || - inCpu.starts_with("generic-")) { - return true; - } - // If CPU is non-host and non-generic then we need to populate the - // corresponding features. - llvm::SubtargetFeatures targetCpuFeatures(outCpuFeatures); - auto addCpuFeatures = [&](const auto &getFeaturesForCPU, - auto &cpuFeatureList) { - getFeaturesForCPU(outCpu, cpuFeatureList, false); - for (const auto &feature : cpuFeatureList) { - targetCpuFeatures.AddFeature(feature); - } - }; - if (triple.isX86()) { - llvm::SmallVector cpuFeatureList; - addCpuFeatures(llvm::X86::getFeaturesForCPU, cpuFeatureList); - } else if (triple.isRISCV64()) { - llvm::SmallVector cpuFeatureList; - addCpuFeatures(llvm::RISCV::getFeaturesForCPU, cpuFeatureList); - } else { - llvm::errs() - << "error: Resolution of target CPU to target CPU features is not " - "implemented on " - "this target architecture. Pass explicit CPU features " - "instead of a CPU " - "on this architecture, or implement that.\n"; - return false; - } - outCpuFeatures = targetCpuFeatures.getString(); - return true; -} - -} // namespace - LLVMTarget::LLVMTarget() { // LLVM loop optimization options. pipelineTuningOptions.LoopInterleaving = DEFAULT_LOOP_INTERLEAVING; @@ -122,14 +44,15 @@ LLVMTarget::LLVMTarget() { llvmTargetOptions.UniqueSectionNames = true; } -std::optional LLVMTarget::create(std::string_view triple, - std::string_view cpu, - std::string_view cpuFeatures, - bool requestLinkEmbedded) { +std::optional +LLVMTarget::create(std::string_view triple, std::string_view cpu, + std::string_view cpuFeatures, bool requestLinkEmbedded, + ResolveCPUAndCPUFeaturesStatus &status) { LLVMTarget target; target.linkEmbedded = requestLinkEmbedded; - - target.triple = triple; + target.triple = (triple.empty() || triple == "host") + ? llvm::sys::getProcessTriple() + : triple; llvm::Triple targetTriple(target.triple); // Special casing if linkEmbedded. if (targetTriple.isWasm()) { @@ -142,23 +65,25 @@ std::optional LLVMTarget::create(std::string_view triple, targetTriple.setEnvironment(llvm::Triple::EnvironmentType::EABI); targetTriple.setOS(llvm::Triple::OSType::UnknownOS); targetTriple.setObjectFormat(llvm::Triple::ObjectFormatType::ELF); - target.triple = targetTriple.str(); - } - if (!resolveCPUAndCPUFeatures(cpu, cpuFeatures, llvm::Triple(triple), - target.cpu, target.cpuFeatures)) { - // Something bad happened, and our target might not be what the user expects - // but we need to continue to avoid breaking existing users. Hopefully - // resolveCPUAndCPUFeatures logged a helpful error already. } - + target.triple = targetTriple.str(); + target.cpu = cpu; + target.cpuFeatures = cpuFeatures; + status = resolveCPUAndCPUFeatures(triple, target.cpu, target.cpuFeatures); return target; } std::optional LLVMTarget::createForHost() { - auto target = - LLVMTarget::create(llvm::sys::getProcessTriple(), /*cpu=*/"host", - /*cpuFeatures=*/"host", - /*requestLinkEmbedded=*/true); + ResolveCPUAndCPUFeaturesStatus status; + auto triple = llvm::sys::getProcessTriple(); + auto target = LLVMTarget::create(triple, /*cpu=*/"host", + /*cpuFeatures=*/"host", + /*requestLinkEmbedded=*/true, status); + if (status != ResolveCPUAndCPUFeaturesStatus::OK) { + llvm::errs() << "Internal error while creating host target: " + << getMessage(status, triple) << "\n"; + assert(false); + } if (target) target->populateDefaultsFromTargetMachine(); return target; @@ -333,9 +258,18 @@ LLVMTarget::loadFromConfigAttr(Location loc, DictionaryAttr config, "accompanied by 'target_triple'"; return {}; } + ResolveCPUAndCPUFeaturesStatus status; std::optional maybeTarget = - LLVMTarget::create(*triple, cpu ? *cpu : "generic", - cpuFeatures ? *cpuFeatures : "", linkEmbedded); + LLVMTarget::create(*triple, cpu.value_or(""), cpuFeatures.value_or(""), + linkEmbedded, status); + if (status != ResolveCPUAndCPUFeaturesStatus::OK) { + // TODO(#18561): after people have had time to adapt, typically by adding + // --iree-llvmcpu-target-cpu=generic (or another value) to their invokes, + // promote this warning to an error by changing emitWarning to emitError + // and nulling maybeTarget. + emitWarning(loc) << "while creating CPU target: " + << getMessage(status, *triple); + } if (!maybeTarget) { return {}; } @@ -670,13 +604,19 @@ LLVMTargetOptions LLVMCPUTargetCLOptions::getTargetOptions() { targetTriple = llvm::sys::getProcessTriple(); } + ResolveCPUAndCPUFeaturesStatus status; std::optional maybeTarget = LLVMTarget::create( - targetTriple, targetCPU, targetCPUFeatures, linkEmbedded); + targetTriple, targetCPU, targetCPUFeatures, linkEmbedded, status); + (void)status; // Ignore status here, since this code runs at target backend + // registration time and we don't know if this backend will + // actually be used, and these error statuses are non-fatal, + // mostly warning about fallbacks. If the target backend is + // actually used, any error here will also trigger in + // loadFromConfigAttr, where we will generate an IR error. if (maybeTarget) { targetOptions.target = *maybeTarget; } else { - llvm::errs() << "Inconsistency in iree-llvmcpu-target-cpu-* command-line" - "flags. The target CPU is not properly defined.\n"; + llvm::errs() << "The target CPU is not properly defined.\n"; } LLVMTarget &target = targetOptions.target; target.linkStatic = linkStatic; diff --git a/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h b/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h index 3d232386f084..0459b2f8b4b7 100644 --- a/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h +++ b/compiler/plugins/target/LLVMCPU/LLVMTargetOptions.h @@ -9,6 +9,7 @@ #include +#include "compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.h" #include "iree/compiler/Utils/OptionUtils.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/raw_ostream.h" @@ -66,10 +67,10 @@ struct LLVMTarget { void storeToConfigAttrs(MLIRContext *context, SmallVector &config) const; - static std::optional create(std::string_view triple, - std::string_view cpu, - std::string_view cpuFeatures, - bool requestLinkEmbedded); + static std::optional + create(std::string_view triple, std::string_view cpu, + std::string_view cpuFeatures, bool requestLinkEmbedded, + ResolveCPUAndCPUFeaturesStatus &status); static std::optional createForHost(); @@ -170,25 +171,26 @@ createTargetMachine(const LLVMTarget &target); // Parse into LLVMTargetOptions using getTargetOptions(). struct LLVMCPUTargetCLOptions { // Target invariant flags. - std::string systemLinkerPath = ""; - std::string embeddedLinkerPath = ""; - std::string wasmLinkerPath = ""; + std::string systemLinkerPath; + std::string embeddedLinkerPath; + std::string wasmLinkerPath; bool keepLinkerArtifacts = false; // Default device options. - std::string targetTriple = ""; - std::string targetCPU = "generic"; - std::string targetCPUFeatures = ""; + std::string targetTriple; + std::string targetCPU; + std::string loggingUnspecifiedTargetCPU; + std::string targetCPUFeatures; bool linkEmbedded = LLVMTarget::DEFAULT_LINK_EMBEDDED; bool linkStatic = LLVMTarget::DEFAULT_LINK_STATIC; - std::string staticLibraryOutputPath = ""; + std::string staticLibraryOutputPath; bool debugSymbols = LLVMTarget::DEFAULT_DEBUG_SYMBOLS; bool llvmLoopInterleaving = LLVMTarget::DEFAULT_LOOP_INTERLEAVING; bool llvmLoopVectorization = LLVMTarget::DEFAULT_LOOP_VECTORIZATION; bool llvmLoopUnrolling = LLVMTarget::DEFAULT_LOOP_UNROLLING; bool llvmSLPVectorization = LLVMTarget::DEFAULT_SLP_VECTORIZATION; SanitizerKind sanitizerKind = LLVMTarget::DEFAULT_SANITIZER_KIND; - std::string targetABI = ""; + std::string targetABI; llvm::FloatABI::ABIType targetFloatABI = LLVMTarget::DEFAULT_FLOAT_ABI; std::string targetDataLayout = LLVMTarget::DEFAULT_DATA_LAYOUT; unsigned targetVectorWidthInBytes = LLVMTarget::DEFAULT_VECTOR_WIDTH_IN_BYTES; diff --git a/compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.cpp b/compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.cpp new file mode 100644 index 000000000000..64779cf7d69d --- /dev/null +++ b/compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.cpp @@ -0,0 +1,191 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.h" + +#include "llvm/TargetParser/Host.h" +#include "llvm/TargetParser/RISCVTargetParser.h" +#include "llvm/TargetParser/SubtargetFeature.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/TargetParser/X86TargetParser.h" + +namespace mlir::iree_compiler::IREE::HAL { + +namespace { + +ResolveCPUAndCPUFeaturesStatus +resolveHostCPUAndCPUFeatures(std::string &cpu, std::string &cpuFeatures) { + if (cpu != "host" && cpuFeatures != "host") { + return ResolveCPUAndCPUFeaturesStatus::OK; + } + if ((!cpu.empty() && cpu != "host") || + (!cpuFeatures.empty() && cpuFeatures != "host")) { + return ResolveCPUAndCPUFeaturesStatus::InconsistentHost; + } + cpu = llvm::sys::getHostCPUName(); + llvm::SubtargetFeatures features; + for (auto &feature : llvm::sys::getHostCPUFeatures()) { + features.AddFeature(feature.first(), feature.second); + } + cpuFeatures = features.getString(); + return ResolveCPUAndCPUFeaturesStatus::OK; +} + +ResolveCPUAndCPUFeaturesStatus +resolveCPUFeaturesForCPU(const llvm::Triple &triple, std::string &cpu, + std::string &cpuFeatures) { + if (!cpuFeatures.empty()) { + // Explicitly specified CPU features: not overriding. + return ResolveCPUAndCPUFeaturesStatus::OK; + } + if (cpu.empty() || cpu == "generic" || + llvm::StringRef(cpu).starts_with("generic-")) { + // Implicitly (default) or explicitly specified generic CPU: no features. + // Logging (on unspecified CPU) was already handled, no need for it here. + return ResolveCPUAndCPUFeaturesStatus::OK; + } + llvm::SubtargetFeatures targetCpuFeatures(cpuFeatures); + auto addCpuFeatures = [&](const auto &getFeaturesForCPU, + auto &cpuFeatureList) { + getFeaturesForCPU(cpu, cpuFeatureList, false); + for (const auto &feature : cpuFeatureList) { + targetCpuFeatures.AddFeature(feature); + } + }; + if (triple.isX86()) { + llvm::SmallVector cpuFeatureList; + addCpuFeatures(llvm::X86::getFeaturesForCPU, cpuFeatureList); + } else if (triple.isRISCV64()) { + llvm::SmallVector cpuFeatureList; + addCpuFeatures(llvm::RISCV::getFeaturesForCPU, cpuFeatureList); + } else { + return ResolveCPUAndCPUFeaturesStatus::UnimplementedMapping; + } + cpuFeatures = targetCpuFeatures.getString(); + return ResolveCPUAndCPUFeaturesStatus::OK; +} + +void tweakCPUFeatures(const llvm::Triple &triple, std::string &cpu, + std::string &cpuFeatures) { + if (triple.isAArch64()) { + llvm::SubtargetFeatures targetCpuFeatures(cpuFeatures); + // Helper to add a feature if not already present. This check matters as + // we check for equality of features to tell whether to generate the error + // about implicitly targeting a generic CPU. + auto addFeature = [&](const char *feature) { + if (!targetCpuFeatures.hasFlag(std::string("+") + feature)) { + targetCpuFeatures.AddFeature(feature, true); + } + }; + // x18 is platform-reserved per the Aarch64 procedure call specification. + addFeature("reserve-x18"); + cpuFeatures = targetCpuFeatures.getString(); + } +} + +std::string getImplicitGenericFallbackMessage(std::string_view triple_str) { + llvm::Triple triple(triple_str); + std::string msg = R"MSG( +Defaulting to targeting a generic CPU for the target architecture will result in poor performance. Please specify a target CPU and/or a target CPU feature set. If it is intended to target a generic CPU, specify "generic" as the CPU. + +This can be done in two ways: +1. With command-line flags: + --iree-llvmcpu-target-cpu=... + --iree-llvmcpu-target-cpu-features=... +2. Within the IR: + #hal.executable.target< ... , cpu="...", cpu_features="..."> + +In the rest of this message, these fields are referred to as just `cpu` and `cpu_features`. + +Examples: + + cpu=generic + Target a generic CPU of the target architecture. The generated code will have poor performance, but will run on any CPU. + + cpu=host + Target the host CPU. The generated code will have optimal performance on the host CPU but will crash on other CPUs not supporting the same CPU features. + + cpu="name" + Target a specific CPU. This is mostly used on x86. The accepted values are the same as in Clang command lines.)MSG"; + if (triple.isX86()) { + msg += R"MSG( + List of accepted x86 CPUs: )MSG"; + llvm::SmallVector allAcceptedCpus; + llvm::X86::fillValidCPUArchList(allAcceptedCpus, /*Only64Bit=*/true); + llvm::raw_string_ostream s(msg); + llvm::interleaveComma(allAcceptedCpus, s); + msg += "\n"; + } else { + msg += R"MSG( + CAVEAT: Outside of x86, this may only set the instruction scheduling model but may not enable CPU features. That's why when targeting non-x86 CPUs, it is usually preferred to pass cpu_features, see below. +)MSG"; + } + msg += R"MSG( + cpu_features="+feature1,..." + Target a CPU supporting the comma-separated of (+-prefixed) features. The accepted values are the same as in Clang command lines. +)MSG"; + if (triple.isAArch64()) { + msg += R"MSG( + Example: cpu_features="+dotprod,+i8mm,+bf16 +)MSG"; + } + if (triple.isRISCV()) { + msg += R"MSG( + Example: cpu_features="+m,+a,+f,+d,+c +)MSG"; + } + return msg; +} + +} // namespace + +ResolveCPUAndCPUFeaturesStatus +resolveCPUAndCPUFeatures(std::string_view triple_str, std::string &cpu, + std::string &cpuFeatures) { + llvm::Triple triple(triple_str); + // No early-return on error status. The caller may treat these errors as + // non-fatal and will carry on with whichever `cpu` and `cpuFeatures` we + // produce. + auto status1 = resolveHostCPUAndCPUFeatures(cpu, cpuFeatures); + auto status2 = resolveCPUFeaturesForCPU(triple, cpu, cpuFeatures); + tweakCPUFeatures(triple, cpu, cpuFeatures); + + std::string defaultTweakedCpu; + std::string defaultTweakedCpuFeatures; + tweakCPUFeatures(triple, defaultTweakedCpu, defaultTweakedCpuFeatures); + + auto status3 = + (cpu == defaultTweakedCpu && cpuFeatures == defaultTweakedCpuFeatures) + ? ResolveCPUAndCPUFeaturesStatus::ImplicitGenericFallback + : ResolveCPUAndCPUFeaturesStatus::OK; + + // Helper to return the first non-OK status. + auto combine = [](ResolveCPUAndCPUFeaturesStatus a, + ResolveCPUAndCPUFeaturesStatus b) { + return a == ResolveCPUAndCPUFeaturesStatus::OK ? b : a; + }; + return combine(combine(status1, status2), status3); +} + +std::string getMessage(ResolveCPUAndCPUFeaturesStatus status, + std::string_view triple_str) { + switch (status) { + case ResolveCPUAndCPUFeaturesStatus::ImplicitGenericFallback: + return getImplicitGenericFallbackMessage(triple_str); + case ResolveCPUAndCPUFeaturesStatus::InconsistentHost: + return "If either CPU or CPU-features is `host`, the other must " + "be either also `host` or the default value.\n"; + case ResolveCPUAndCPUFeaturesStatus::UnimplementedMapping: + return "Resolution of CPU to CPU-features is not implemented on this " + "target architecture. Pass explicit " + "CPU-features, or implement the missing mapping.\n"; + default: + assert(false); + return ""; + } +} + +} // namespace mlir::iree_compiler::IREE::HAL diff --git a/compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.h b/compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.h new file mode 100644 index 000000000000..fa23e684c2ae --- /dev/null +++ b/compiler/plugins/target/LLVMCPU/ResolveCPUAndCPUFeatures.h @@ -0,0 +1,33 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_PLUGINS_TARGET_LLVMCPU_RESOLVECPUANDCPUFEATURES_H_ +#define IREE_COMPILER_PLUGINS_TARGET_LLVMCPU_RESOLVECPUANDCPUFEATURES_H_ + +#include + +namespace mlir::iree_compiler::IREE::HAL { + +enum class ResolveCPUAndCPUFeaturesStatus { + OK, + InconsistentHost, + UnimplementedMapping, + ImplicitGenericFallback +}; + +// Given an input `triple` and the input-output parameters `cpu` and +// `cpuFeatures`, which may be empty or the special "host" value, this function +// populates `cpu` and `cpuFeatures` with all the information that is known. +ResolveCPUAndCPUFeaturesStatus +resolveCPUAndCPUFeatures(std::string_view triple, std::string &cpu, + std::string &cpuFeatures); + +std::string getMessage(ResolveCPUAndCPUFeaturesStatus status, + std::string_view triple); + +} // namespace mlir::iree_compiler::IREE::HAL + +#endif // IREE_COMPILER_PLUGINS_TARGET_LLVMCPU_RESOLVECPUANDCPUFEATURES_H_ diff --git a/samples/simple_embedding/BUILD.bazel b/samples/simple_embedding/BUILD.bazel index 86b542d65e02..3f5f5edc3c41 100644 --- a/samples/simple_embedding/BUILD.bazel +++ b/samples/simple_embedding/BUILD.bazel @@ -111,6 +111,7 @@ iree_bytecode_module( flags = [ "--iree-hal-target-backends=llvm-cpu", "--iree-llvmcpu-target-triple=x86_64-pc-linux-elf", + "--iree-llvmcpu-target-cpu=generic", "--iree-llvmcpu-debug-symbols=false", "--iree-vm-bytecode-module-strip-source-map=true", "--iree-vm-emit-polyglot-zip=false", @@ -154,6 +155,7 @@ iree_bytecode_module( flags = [ "--iree-hal-target-backends=llvm-cpu", "--iree-llvmcpu-target-triple=armv7a-pc-linux-elf", + "--iree-llvmcpu-target-cpu=generic", "--iree-llvmcpu-target-float-abi=hard", "--iree-llvmcpu-debug-symbols=false", "--iree-vm-bytecode-module-strip-source-map=true", @@ -168,6 +170,7 @@ iree_bytecode_module( flags = [ "--iree-hal-target-backends=llvm-cpu", "--iree-llvmcpu-target-triple=aarch64-pc-linux-elf", + "--iree-llvmcpu-target-cpu=generic", "--iree-llvmcpu-debug-symbols=false", "--iree-vm-bytecode-module-strip-source-map=true", "--iree-vm-emit-polyglot-zip=false", diff --git a/samples/simple_embedding/CMakeLists.txt b/samples/simple_embedding/CMakeLists.txt index faac2c38691e..7ab24b7c5f52 100644 --- a/samples/simple_embedding/CMakeLists.txt +++ b/samples/simple_embedding/CMakeLists.txt @@ -98,6 +98,7 @@ iree_bytecode_module( FLAGS "--iree-hal-target-backends=llvm-cpu" "--iree-llvmcpu-target-triple=x86_64-pc-linux-elf" + "--iree-llvmcpu-target-cpu=generic" "--iree-llvmcpu-debug-symbols=false" "--iree-vm-bytecode-module-strip-source-map=true" "--iree-vm-emit-polyglot-zip=false" @@ -150,6 +151,7 @@ iree_bytecode_module( FLAGS "--iree-hal-target-backends=llvm-cpu" "--iree-llvmcpu-target-triple=armv7a-pc-linux-elf" + "--iree-llvmcpu-target-cpu=generic" "--iree-llvmcpu-target-float-abi=hard" "--iree-llvmcpu-debug-symbols=false" "--iree-vm-bytecode-module-strip-source-map=true" @@ -167,6 +169,7 @@ iree_bytecode_module( FLAGS "--iree-hal-target-backends=llvm-cpu" "--iree-llvmcpu-target-triple=aarch64-pc-linux-elf" + "--iree-llvmcpu-target-cpu=generic" "--iree-llvmcpu-debug-symbols=false" "--iree-vm-bytecode-module-strip-source-map=true" "--iree-vm-emit-polyglot-zip=false" diff --git a/samples/static_library/CMakeLists.txt b/samples/static_library/CMakeLists.txt index 7727bd84a60b..49c514a7de1f 100644 --- a/samples/static_library/CMakeLists.txt +++ b/samples/static_library/CMakeLists.txt @@ -14,6 +14,7 @@ endif() # Setup args for iree-compile. set(_COMPILE_ARGS) list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=llvm-cpu") +list(APPEND _COMPILE_ARGS "--iree-llvmcpu-target-cpu=generic") list(APPEND _COMPILE_ARGS "--iree-llvmcpu-link-embedded=false") list(APPEND _COMPILE_ARGS "--iree-llvmcpu-link-static") list(APPEND _COMPILE_ARGS "--iree-llvmcpu-static-library-output-path=simple_mul.o") @@ -100,6 +101,7 @@ endif() set(_COMPILE_ARGS) list(APPEND _COMPILE_ARGS "--output-format=vm-c") list(APPEND _COMPILE_ARGS "--iree-hal-target-backends=llvm-cpu") +list(APPEND _COMPILE_ARGS "--iree-llvmcpu-target-cpu=generic") list(APPEND _COMPILE_ARGS "--iree-llvmcpu-link-embedded=false") list(APPEND _COMPILE_ARGS "--iree-llvmcpu-link-static") list(APPEND _COMPILE_ARGS "--iree-llvmcpu-static-library-output-path=simple_mul_c_module.o") diff --git a/tests/e2e/attention/CMakeLists.txt b/tests/e2e/attention/CMakeLists.txt index 8ab6ca947dd1..f7a199223df5 100644 --- a/tests/e2e/attention/CMakeLists.txt +++ b/tests/e2e/attention/CMakeLists.txt @@ -25,8 +25,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -50,8 +48,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -75,8 +71,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) # To distinguish between CDNA(gfx9) and RDNA3(gfx11) diff --git a/tests/e2e/convolution/BUILD.bazel b/tests/e2e/convolution/BUILD.bazel index 9847e210158f..488c5ab94d5a 100644 --- a/tests/e2e/convolution/BUILD.bazel +++ b/tests/e2e/convolution/BUILD.bazel @@ -41,7 +41,6 @@ py_binary( target_backends_and_drivers = [ ("llvm-cpu", "local-task"), ], - target_cpu_features_variants = ["default"], test_runner = "//tools/testing/e2e:iree-e2e-conv2d-test", test_type = "conv2d", ) for dtype in [ @@ -73,7 +72,6 @@ py_binary( target_backends_and_drivers = [ ("llvm-cpu", "local-task"), ], - target_cpu_features_variants = ["default"], test_runner = "//tools/testing/e2e:iree-e2e-conv2d-test", test_type = "conv2d", ) for dtype in [ diff --git a/tests/e2e/convolution/CMakeLists.txt b/tests/e2e/convolution/CMakeLists.txt index 8ddad849b082..977af1c020d3 100644 --- a/tests/e2e/convolution/CMakeLists.txt +++ b/tests/e2e/convolution/CMakeLists.txt @@ -31,8 +31,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -56,8 +54,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -81,8 +77,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -106,8 +100,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -131,8 +123,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -156,8 +146,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -183,8 +171,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -210,8 +196,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -237,8 +221,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -264,8 +246,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -291,8 +271,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) iree_generated_e2e_runner_test( @@ -318,8 +296,6 @@ iree_generated_e2e_runner_test( LABELS "hostonly" "local" - TARGET_CPU_FEATURES_VARIANTS - "default" ) ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/tests/e2e/linalg/BUILD.bazel b/tests/e2e/linalg/BUILD.bazel index 791fd2879b9d..5d5fb39e6e7b 100644 --- a/tests/e2e/linalg/BUILD.bazel +++ b/tests/e2e/linalg/BUILD.bazel @@ -41,6 +41,7 @@ iree_check_single_backend_test_suite( "nowasm", ], target_backend = "llvm-cpu", + compiler_flags = ["--iree-llvmcpu-target-cpu=generic"], ) VMVX_SRCS = enforce_glob( @@ -92,6 +93,7 @@ iree_check_single_backend_test_suite( srcs = WINOGRAD_CONV_SRCS, compiler_flags = [ "--iree-preprocessing-pass-pipeline=builtin.module\\(func.func\\(iree-linalg-ext-convert-conv2d-to-winograd\\)\\)", + "--iree-llvmcpu-target-cpu=generic", ], driver = "local-task", target_backend = "llvm-cpu", diff --git a/tests/e2e/linalg/CMakeLists.txt b/tests/e2e/linalg/CMakeLists.txt index 9794387a9691..9a6cc234b316 100644 --- a/tests/e2e/linalg/CMakeLists.txt +++ b/tests/e2e/linalg/CMakeLists.txt @@ -22,6 +22,8 @@ iree_check_single_backend_test_suite( "llvm-cpu" DRIVER "local-task" + COMPILER_FLAGS + "--iree-llvmcpu-target-cpu=generic" LABELS "nowasm" ) @@ -62,6 +64,7 @@ iree_check_single_backend_test_suite( "local-task" COMPILER_FLAGS "--iree-preprocessing-pass-pipeline=builtin.module\(func.func\(iree-linalg-ext-convert-conv2d-to-winograd\)\)" + "--iree-llvmcpu-target-cpu=generic" ) iree_check_single_backend_test_suite( diff --git a/tests/e2e/linalg_ext_ops/BUILD.bazel b/tests/e2e/linalg_ext_ops/BUILD.bazel index 7e64cfe8d2b6..4c18bd492c6d 100644 --- a/tests/e2e/linalg_ext_ops/BUILD.bazel +++ b/tests/e2e/linalg_ext_ops/BUILD.bazel @@ -36,6 +36,7 @@ iree_check_single_backend_test_suite( "nowasm", ], target_backend = "llvm-cpu", + compiler_flags = ["--iree-llvmcpu-target-cpu=generic"], ) VMVX_SRCS = enforce_glob( diff --git a/tests/e2e/linalg_ext_ops/CMakeLists.txt b/tests/e2e/linalg_ext_ops/CMakeLists.txt index 5c36220b2957..5bc968c6e9b7 100644 --- a/tests/e2e/linalg_ext_ops/CMakeLists.txt +++ b/tests/e2e/linalg_ext_ops/CMakeLists.txt @@ -25,6 +25,8 @@ iree_check_single_backend_test_suite( "llvm-cpu" DRIVER "local-task" + COMPILER_FLAGS + "--iree-llvmcpu-target-cpu=generic" LABELS "nowasm" ) diff --git a/tests/e2e/matmul/BUILD.bazel b/tests/e2e/matmul/BUILD.bazel index 17b27573f032..635ee0cc3213 100644 --- a/tests/e2e/matmul/BUILD.bazel +++ b/tests/e2e/matmul/BUILD.bazel @@ -46,7 +46,7 @@ py_binary( target_backends_and_drivers = [ ("llvm-cpu", "local-task"), ], - target_cpu_features_variants = ["default"] + + target_cpu_features_variants = ["generic"] + # Widening matmuls fail to lower for SVE. (["arm_64:sve:+sve"] if lhs_rhs_type == acc_type else []), test_runner = "//tools/testing/e2e:iree-e2e-matmul-test", @@ -149,7 +149,7 @@ X86_64_AVX512_BF16 = X86_64_AVX512 + [ target_backends_and_drivers = [ ("llvm-cpu", "local-task"), ], - target_cpu_features_variants = ["default"] + + target_cpu_features_variants = ["generic"] + ([ "arm_64:dotprod:+dotprod", "arm_64:i8mm:+i8mm", @@ -216,7 +216,7 @@ X86_64_AVX512_BF16 = X86_64_AVX512 + [ target_backends_and_drivers = [ ("llvm-cpu", "local-task"), ], - target_cpu_features_variants = ["default"] + + target_cpu_features_variants = ["generic"] + ([ "arm_64:dotprod:+dotprod", "arm_64:i8mm:+i8mm", diff --git a/tests/e2e/matmul/CMakeLists.txt b/tests/e2e/matmul/CMakeLists.txt index df2c92c5607d..36e1255c5bfd 100644 --- a/tests/e2e/matmul/CMakeLists.txt +++ b/tests/e2e/matmul/CMakeLists.txt @@ -140,7 +140,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "arm_64:dotprod:+dotprod" "arm_64:i8mm:+i8mm" "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" @@ -168,7 +168,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" ) @@ -196,7 +196,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fullfp16:+fullfp16" @@ -225,7 +225,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fp16fml:+fp16fml" @@ -254,7 +254,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" @@ -284,7 +284,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" @@ -313,7 +313,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "arm_64:dotprod:+dotprod" "arm_64:i8mm:+i8mm" "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" @@ -341,7 +341,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" ) @@ -369,7 +369,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fullfp16:+fullfp16" @@ -398,7 +398,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fp16fml:+fp16fml" @@ -427,7 +427,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" @@ -457,7 +457,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" @@ -487,7 +487,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "arm_64:dotprod:+dotprod" "arm_64:i8mm:+i8mm" "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" @@ -516,7 +516,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" ) @@ -545,7 +545,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fullfp16:+fullfp16" @@ -575,7 +575,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fp16fml:+fp16fml" @@ -605,7 +605,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" @@ -636,7 +636,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" @@ -666,7 +666,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "arm_64:dotprod:+dotprod" "arm_64:i8mm:+i8mm" "x86_64:avx512vnni:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512vnni" @@ -695,7 +695,7 @@ iree_generated_e2e_runner_test( LABELS TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" ) @@ -724,7 +724,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fullfp16:+fullfp16" @@ -754,7 +754,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "arm_64:fp16fml:+fp16fml" @@ -784,7 +784,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" @@ -815,7 +815,7 @@ iree_generated_e2e_runner_test( "noriscv" "nowasm" TARGET_CPU_FEATURES_VARIANTS - "default" + "generic" "x86_64:avx2:+avx,+avx2,+fma,+f16c" "x86_64:avx512:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq" "x86_64:avx512bf16:+avx,+avx2,+fma,+f16c,+avx512f,+avx512vl,+avx512cd,+avx512bw,+avx512dq,+avx512bf16" diff --git a/tests/e2e/regression/BUILD.bazel b/tests/e2e/regression/BUILD.bazel index 87fdccb53650..ab73e7c3bf5a 100644 --- a/tests/e2e/regression/BUILD.bazel +++ b/tests/e2e/regression/BUILD.bazel @@ -65,6 +65,7 @@ iree_check_single_backend_test_suite( ], compiler_flags = [ "--iree-llvmcpu-reassociate-fp-reductions=false", + "--iree-llvmcpu-target-cpu=generic", ], driver = "local-task", input_type = "stablehlo", @@ -81,6 +82,7 @@ iree_check_single_backend_test_suite( driver = "local-task", input_type = "stablehlo", target_backend = "llvm-cpu", + compiler_flags = ["--iree-llvmcpu-target-cpu=generic"], ) iree_check_single_backend_test_suite( @@ -94,6 +96,7 @@ iree_check_single_backend_test_suite( driver = "local-task", input_type = "tosa", target_backend = "llvm-cpu", + compiler_flags = ["--iree-llvmcpu-target-cpu=generic"], ) iree_check_single_backend_test_suite( @@ -160,7 +163,10 @@ iree_check_single_backend_test_suite( srcs = [ "disable_demote_f64_to_f32.mlir", ], - compiler_flags = ["-iree-input-demote-f64-to-f32=false"], + compiler_flags = [ + "--iree-input-demote-f64-to-f32=false", + "--iree-llvmcpu-target-cpu=generic" + ], driver = "local-task", target_backend = "llvm-cpu", ) @@ -172,6 +178,7 @@ iree_check_single_backend_test_suite( ], compiler_flags = [ "--iree-dispatch-creation-fuse-multi-use", + "--iree-llvmcpu-target-cpu=generic", ], driver = "local-task", target_backend = "llvm-cpu", diff --git a/tests/e2e/regression/CMakeLists.txt b/tests/e2e/regression/CMakeLists.txt index 06597e6c65af..3b7055970591 100644 --- a/tests/e2e/regression/CMakeLists.txt +++ b/tests/e2e/regression/CMakeLists.txt @@ -42,6 +42,7 @@ iree_check_single_backend_test_suite( "local-task" COMPILER_FLAGS "--iree-llvmcpu-reassociate-fp-reductions=false" + "--iree-llvmcpu-target-cpu=generic" INPUT_TYPE "stablehlo" ) @@ -72,6 +73,8 @@ iree_check_single_backend_test_suite( "llvm-cpu" DRIVER "local-task" + COMPILER_FLAGS + "--iree-llvmcpu-target-cpu=generic" INPUT_TYPE "stablehlo" ) @@ -85,6 +88,8 @@ iree_check_single_backend_test_suite( "llvm-cpu" DRIVER "local-task" + COMPILER_FLAGS + "--iree-llvmcpu-target-cpu=generic" INPUT_TYPE "tosa" ) @@ -209,7 +214,8 @@ iree_check_single_backend_test_suite( DRIVER "local-task" COMPILER_FLAGS - "-iree-input-demote-f64-to-f32=false" + "--iree-input-demote-f64-to-f32=false" + "--iree-llvmcpu-target-cpu=generic" ) iree_check_single_backend_test_suite( @@ -223,6 +229,7 @@ iree_check_single_backend_test_suite( "local-task" COMPILER_FLAGS "--iree-dispatch-creation-fuse-multi-use" + "--iree-llvmcpu-target-cpu=generic" ) iree_check_single_backend_test_suite( diff --git a/tests/e2e/stablehlo_ops/BUILD.bazel b/tests/e2e/stablehlo_ops/BUILD.bazel index fb9a65d470cf..ae52aefd9d13 100644 --- a/tests/e2e/stablehlo_ops/BUILD.bazel +++ b/tests/e2e/stablehlo_ops/BUILD.bazel @@ -86,6 +86,7 @@ iree_check_single_backend_test_suite( srcs = ALL_SRCS, compiler_flags = [ "--iree-input-demote-f64-to-f32", + "--iree-llvmcpu-target-cpu=generic", ], driver = "local-task", input_type = "stablehlo", @@ -103,7 +104,7 @@ iree_check_single_backend_test_suite( srcs = ALL_SRCS, compiler_flags = [ "--iree-input-demote-f64-to-f32", - "--iree-llvmcpu-target-cpu-features=host", + "--iree-llvmcpu-target-cpu=host", ], driver = "local-task", input_type = "stablehlo", diff --git a/tests/e2e/stablehlo_ops/CMakeLists.txt b/tests/e2e/stablehlo_ops/CMakeLists.txt index d5e67dbc4b6a..fef9f7338328 100644 --- a/tests/e2e/stablehlo_ops/CMakeLists.txt +++ b/tests/e2e/stablehlo_ops/CMakeLists.txt @@ -82,6 +82,7 @@ iree_check_single_backend_test_suite( "local-task" COMPILER_FLAGS "--iree-input-demote-f64-to-f32" + "--iree-llvmcpu-target-cpu=generic" INPUT_TYPE "stablehlo" LABELS @@ -160,7 +161,7 @@ iree_check_single_backend_test_suite( "local-task" COMPILER_FLAGS "--iree-input-demote-f64-to-f32" - "--iree-llvmcpu-target-cpu-features=host" + "--iree-llvmcpu-target-cpu=host" INPUT_TYPE "stablehlo" LABELS diff --git a/tests/e2e/tensor_ops/BUILD.bazel b/tests/e2e/tensor_ops/BUILD.bazel index 4b164e049811..11b3ac988b88 100644 --- a/tests/e2e/tensor_ops/BUILD.bazel +++ b/tests/e2e/tensor_ops/BUILD.bazel @@ -35,6 +35,7 @@ iree_check_single_backend_test_suite( srcs = ALL_SRCS, driver = "local-task", target_backend = "llvm-cpu", + compiler_flags = ["--iree-llvmcpu-target-cpu=generic"], ) iree_check_single_backend_test_suite( diff --git a/tests/e2e/tensor_ops/CMakeLists.txt b/tests/e2e/tensor_ops/CMakeLists.txt index 4a6f71851997..879629f84ef0 100644 --- a/tests/e2e/tensor_ops/CMakeLists.txt +++ b/tests/e2e/tensor_ops/CMakeLists.txt @@ -28,6 +28,8 @@ iree_check_single_backend_test_suite( "llvm-cpu" DRIVER "local-task" + COMPILER_FLAGS + "--iree-llvmcpu-target-cpu=generic" ) iree_check_single_backend_test_suite( diff --git a/tests/e2e/tosa_ops/BUILD.bazel b/tests/e2e/tosa_ops/BUILD.bazel index cf3a69048539..c74609166e9d 100644 --- a/tests/e2e/tosa_ops/BUILD.bazel +++ b/tests/e2e/tosa_ops/BUILD.bazel @@ -65,6 +65,7 @@ iree_check_single_backend_test_suite( driver = "local-task", input_type = "tosa", target_backend = "llvm-cpu", + compiler_flags = ["--iree-llvmcpu-target-cpu=generic"], ) iree_check_single_backend_test_suite( diff --git a/tests/e2e/tosa_ops/CMakeLists.txt b/tests/e2e/tosa_ops/CMakeLists.txt index 9e2837870130..8cfd56feafd4 100644 --- a/tests/e2e/tosa_ops/CMakeLists.txt +++ b/tests/e2e/tosa_ops/CMakeLists.txt @@ -59,6 +59,8 @@ iree_check_single_backend_test_suite( "llvm-cpu" DRIVER "local-task" + COMPILER_FLAGS + "--iree-llvmcpu-target-cpu=generic" INPUT_TYPE "tosa" )