Skip to content

Commit

Permalink
Cleaning up hal.executable.variant syntax. (#15254)
Browse files Browse the repository at this point in the history
This will make it possible to add more fields/regions without creating
parsing ambiguity.
  • Loading branch information
benvanik authored Oct 20, 2023
1 parent a95a28a commit 20e2112
Show file tree
Hide file tree
Showing 124 changed files with 825 additions and 822 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: iree-opt --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-distribute, cse)))))" %s | FileCheck %s

hal.executable private @add_tensor {
hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
hal.executable.export public @add_tensor ordinal(0)
layout(#hal.pipeline.layout<push_constants = 0,
sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
]>
]>
hal.executable private @shared_mem_cpy {
hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @shared_mem_cpy layout(#pipeline_layout) attributes {
workgroup_size = [32: index, 4: index, 1:index]
} {
Expand Down Expand Up @@ -110,7 +110,7 @@ hal.executable private @shared_mem_cpy {
]>

hal.executable private @unaligned_shared_memory_copy {
hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @unaligned_shared_memory_copy layout(#pipeline_layout) attributes {
workgroup_size = [32: index, 8: index, 1:index]
} {
Expand Down Expand Up @@ -166,7 +166,7 @@ hal.executable private @unaligned_shared_memory_copy {
]>

hal.executable private @zero_dim_shared_memory_copy {
hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @zero_dim_shared_memory_copy layout(#pipeline_layout) attributes {
workgroup_size = [32: index, 8: index, 1:index]
} {
Expand Down Expand Up @@ -204,7 +204,7 @@ hal.executable private @zero_dim_shared_memory_copy {
]>

hal.executable private @zero_dim_shared_memory_copy {
hal.executable.variant @cuda, target = <"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @zero_dim_shared_memory_copy layout(#pipeline_layout) attributes {
workgroup_size = [32: index, 8: index, 1:index]
} {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-gpu-tensor-tile, cse)))))" %s | FileCheck %s

hal.executable private @add_tensor {
hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
hal.executable.export public @add_tensor ordinal(0)
layout(#hal.pipeline.layout<push_constants = 0,
sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
Expand Down Expand Up @@ -61,7 +61,7 @@ hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb",
// -----

hal.executable private @reduction {
hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
hal.executable.export public @reduction ordinal(0)
layout(#hal.pipeline.layout<push_constants = 0,
sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
Expand Down Expand Up @@ -121,7 +121,7 @@ hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb",
// -----

hal.executable private @reduction_broadcast {
hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}> {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_60"}>) {
hal.executable.export public @reduction_broadcast ordinal(0)
layout(#hal.pipeline.layout<push_constants = 0,
sets = [<0, bindings = [<0, storage_buffer>, <1, storage_buffer>, <2, storage_buffer>]>]>)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: iree-opt --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(builtin.module(func.func(iree-codegen-fold-affinemin-in-distributed-loops, canonicalize)))))' --split-input-file %s | FileCheck %s

hal.executable public @generic_static {
hal.executable.variant public @cuda_nvptx_fb, target = <"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}> {
hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {target_arch = "sm_80"}>) {
hal.executable.export public @generic_static ordinal(0) layout(#hal.pipeline.layout<push_constants = 0, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer>]>]>) attributes {translation_info = #iree_codegen.translation_info<LLVMGPUTransposeSharedMem>, workgroup_size = [8 : index, 32 : index, 1 : index]} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index):
%c128 = arith.constant 128 : index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

// CHECK-LABEL: func.func @dispatch_0()
hal.executable private @dispatch_0 {
hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @dispatch_0 layout(#pipeline_layout) attributes {
workgroup_size = [64: index, 1: index, 1:index]
} {
Expand Down Expand Up @@ -60,7 +60,7 @@ hal.executable private @dispatch_0 {
// CHECK-LABEL: func.func @workgroup_tile_loop()
#translation = #iree_codegen.translation_info<LLVMGPUDistribute>
hal.executable private @workgroup_tile_loop {
hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @workgroup_tile_loop layout(#pipeline_layout) attributes {
translation_info = #translation
} {
Expand Down Expand Up @@ -99,7 +99,7 @@ hal.executable private @workgroup_tile_loop {
// CHECK-LABEL: func.func @workgroup_tile_loop_negative()
#translation = #iree_codegen.translation_info<LLVMGPUDistribute>
hal.executable private @workgroup_tile_loop_negative {
hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @workgroup_tile_loop_negative layout(#pipeline_layout) attributes {
translation_info = #translation
} {
Expand Down Expand Up @@ -140,7 +140,7 @@ hal.executable private @workgroup_tile_loop_negative {
// CHECK: gpu.barrier
#translation = #iree_codegen.translation_info<LLVMGPUDistribute>
hal.executable private @both_workgroup_and_workitem {
hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
hal.executable.variant @cuda target(#hal.executable.target<"cuda", "cuda-nvptx-fb">) {
hal.executable.export @both_workgroup_and_workitem layout(#pipeline_layout) attributes {
translation_info = #translation,
workgroup_size = [8: index, 2: index, 1: index]
Expand Down Expand Up @@ -196,19 +196,17 @@ hal.executable private @both_workgroup_and_workitem {

// -----


#config = #iree_codegen.lowering_config<tile_sizes = [[4], [4], [0]]>
#device_target_cpu = #hal.device.target<"llvm-cpu", {executable_targets = [#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>]}>
#pipeline_layout = #hal.pipeline.layout<push_constants = 0, sets = [#hal.descriptor_set.layout<0, bindings = [#hal.descriptor_set.binding<0, storage_buffer>, #hal.descriptor_set.binding<1, storage_buffer>, #hal.descriptor_set.binding<2, storage_buffer>]>]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>
#translation = #iree_codegen.translation_info<CPUDoubleTilingExpert>
#map0 = affine_map<()[s0] -> (s0 ceildiv 4)>
#map1 = affine_map<()[s0] -> (s0 * 4)>
#map2 = affine_map<()[s0, s1] -> (-((s0 * -4 + 4) mod (s1 * 4)) + 4)>
#map3 = affine_map<(d0)[s0] -> (d0 + s0)>
module attributes {hal.device.targets = [#device_target_cpu]} {
hal.executable private @simple_mul {
hal.executable.variant public @embedded_elf_x86_64, target = #executable_target_embedded_elf_x86_64_ {
hal.executable.variant public @embedded_elf_x86_64 target(#hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 16 : index, target_triple = "x86_64-none-elf"}>) {
hal.executable.export public @simple_mul ordinal(0) layout(#pipeline_layout) attributes {translation_info = #translation} {
^bb0(%arg0: !hal.device, %arg1: index, %arg2: index, %arg3: index):
%c1 = arith.constant 1 : index
Expand Down
Loading

0 comments on commit 20e2112

Please sign in to comment.