Cleaning up hal.executable.variant syntax. (#15254)

This will make it possible to add more fields/regions without creating parsing ambiguity.
iree-org · Oct 20, 2023 · 20e2112 · 20e2112
1 parent a95a28a
commit 20e2112
Show file tree

Hide file tree

Showing 124 changed files with 825 additions and 822 deletions.
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-distribute, cse)))))" %s | FileCheck %s
 
 hal.executable private @add_tensor  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @add_tensor ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_distribute_shared_memory.mlir
@@ -16,7 +16,7 @@
   ]>
 ]>
 hal.executable private @shared_mem_cpy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @shared_mem_cpy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 4: index, 1:index]
     } {
@@ -110,7 +110,7 @@ hal.executable private @shared_mem_cpy  {
 ]>
 
 hal.executable private @unaligned_shared_memory_copy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @unaligned_shared_memory_copy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 8: index, 1:index]
     } {
@@ -166,7 +166,7 @@ hal.executable private @unaligned_shared_memory_copy  {
 ]>
 
 hal.executable private @zero_dim_shared_memory_copy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @zero_dim_shared_memory_copy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 8: index, 1:index]
     } {
@@ -204,7 +204,7 @@ hal.executable private @zero_dim_shared_memory_copy  {
 ]>
 
 hal.executable private @zero_dim_shared_memory_copy  {
-  hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @zero_dim_shared_memory_copy layout(#pipeline_layout) attributes {
       workgroup_size = [32: index, 8: index, 1:index]
     } {

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_tensor_tile.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-tensor-tile, cse)))))" %s | FileCheck %s
 
 hal.executable private @add_tensor  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @add_tensor ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
@@ -61,7 +61,7 @@ hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb",
 // -----
 
 hal.executable private @reduction  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @reduction ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
@@ -121,7 +121,7 @@ hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb",
 // -----
 
 hal.executable private @reduction_broadcast  {
-hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
+hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
   hal.executable.export public @reduction_broadcast ordinal(0)
   layout(#hal.pipeline.layout<push_constants = 0,
          sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir b/compiler/src/iree/compiler/Codegen/Common/test/fold_affine_min_of_block_id.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-fold-affinemin-in-distributed-loops, canonicalize)))))' --split-input-file %s | FileCheck %s
 
 hal.executable public @generic_static {
-  hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
+  hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
     hal.executable.export public @generic_static ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTransposeSharedMem>, workgroup_size = [8 : index, 32 : index, 1 : index]} {
     ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
       %c128 = arith.constant 128 : index

diff --git a/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir b/compiler/src/iree/compiler/Codegen/Common/test/remove_trivial_loops.mlir
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: func.func @dispatch_0()
 hal.executable private @dispatch_0  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @dispatch_0 layout(#pipeline_layout) attributes {
       workgroup_size = [64: index, 1: index, 1:index]
     } {
@@ -60,7 +60,7 @@ hal.executable private @dispatch_0  {
 // CHECK-LABEL: func.func @workgroup_tile_loop()
 #translation = #iree_codegen.translation_info<LLVMGPUDistribute>
 hal.executable private @workgroup_tile_loop  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @workgroup_tile_loop layout(#pipeline_layout) attributes {
       translation_info = #translation
     } {
@@ -99,7 +99,7 @@ hal.executable private @workgroup_tile_loop  {
 // CHECK-LABEL: func.func @workgroup_tile_loop_negative()
 #translation = #iree_codegen.translation_info<LLVMGPUDistribute>
 hal.executable private @workgroup_tile_loop_negative  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @workgroup_tile_loop_negative layout(#pipeline_layout) attributes {
       translation_info = #translation
     } {
@@ -140,7 +140,7 @@ hal.executable private @workgroup_tile_loop_negative  {
 //       CHECK:   gpu.barrier
 #translation = #iree_codegen.translation_info<LLVMGPUDistribute>
 hal.executable private @both_workgroup_and_workitem  {
-  hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
+  hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
     hal.executable.export @both_workgroup_and_workitem layout(#pipeline_layout) attributes {
       translation_info = #translation,
       workgroup_size = [8: index, 2: index, 1: index]
@@ -196,19 +196,17 @@ hal.executable private @both_workgroup_and_workitem  {
 
 // -----
 
-
 #config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>
 #device_target_cpu = #hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>]}>
 #pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>
-#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
 #translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
 #map0 = affine_map<()[s0] -> (s0 ceildiv 4)>
 #map1 = affine_map<()[s0] -> (s0 * 4)>
 #map2 = affine_map<()[s0, s1] -> (-((s0 * -4 + 4) mod (s1 * 4)) + 4)>
 #map3 = affine_map<(d0)[s0] -> (d0 + s0)>
 module attributes {hal.device.targets = [#device_target_cpu]} {
   hal.executable private @simple_mul {
-    hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
+    hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
       hal.executable.export public @simple_mul ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
       ^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
         %c1 = arith.constant 1 : index