Skip to content

Commit

Permalink
Start LLVM integrate integrate-llvm-20231018_6 (#15223)
Browse files Browse the repository at this point in the history
Co-authored-by: Quinn Dawkins <quinn@nod-labs.com>
Co-authored-by: MaheshRavishankar <mahesh@nod-labs.com>
  • Loading branch information
3 people authored Oct 25, 2023
1 parent e62db18 commit 28732a2
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 114 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func.func @ukernel_generic_optional_other_operands(

func.func @ukernel_generic_non_tensor_memref_outs(
%out0 : f32) -> f32 {
// expected-error @+1 {{operand #0 must be ranked tensor of any type values or memref of any type values, but got 'f32'}}
// expected-error @+1 {{operand #0 must be variadic of ranked tensor of any type values or memref of any type values, but got 'f32'}}
%0 = iree_codegen.ukernel.generic "foo"
outs(%out0 : f32) -> f32
return %0 : f32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ module attributes {hal.device.targets = [#device_target_cuda]} {
// CHECK: transform.iree.register_match_callbacks
// CHECK: %[[MATCH:.+]]:2 = transform.iree.match_callback failures(propagate) "batch_matmul"
// CHECK: %[[TILED:.+]], %[[FORALL:.+]] = transform.structured.tile_using_forall %[[MATCH]]#1
// DEFAULT: num_threads [] tile_sizes [64, 64, 1](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// OPTIONS: num_threads [] tile_sizes [128, 64, 32](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// DEFAULT: tile_sizes [64, 64, 1](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// OPTIONS: tile_sizes [128, 64, 32](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse
Expand All @@ -113,39 +113,39 @@ module attributes {hal.device.targets = [#device_target_cuda]} {
// CHECK: %[[RHS:.+]] = get_producer_of_operand %[[PADDED]][1]
// CHECK: %[[RHS_DPS:.+]] = transform.structured.rewrite_in_destination_passing_style %[[RHS]]

// CHECK: transform.structured.tile_using_forall %[[LHS]]
// DEFAULT: num_threads [1, 32, 4] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [1, 64, 2] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %[[LHS]]
// DEFAULT: num_threads [1, 32, 4](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [1, 64, 2](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse
// CHECK: transform.structured.match ops{["scf.if"]}
// CHECK: transform.scf.take_assumed_branch %{{.*}} take_else_branch

// CHECK: transform.structured.tile_using_forall %[[RHS_DPS]]
// DEFAULT: num_threads [8, 16, 1] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [2, 8, 8] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: apply_patterns
// CHECK: transform.structured.tile_using_forall %[[RHS_DPS]]
// DEFAULT: num_threads [8, 16, 1](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [2, 8, 8](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse

// CHECK: transform.structured.tile_using_forall
// DEFAULT: num_threads [2, 64, 1] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [1, 16, 8] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// DEFAULT: num_threads [2, 64, 1](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// OPTIONS: num_threads [1, 16, 8](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse

// CHECK: transform.structured.tile_using_forall
// DEFAULT: num_threads [1, 2, 64] tile_sizes [](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// OPTIONS: num_threads [1, 4, 32] tile_sizes [](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// CHECK: apply_patterns
// DEFAULT: num_threads [1, 2, 64](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// OPTIONS: num_threads [1, 4, 32](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse

// CHECK: %tiled_op_8, %forall_op_9 = transform.structured.tile_using_forall %[[FILL]]
// DEFAULT: num_threads [1, 2, 64] tile_sizes [](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// OPTIONS: num_threads [1, 4, 32] tile_sizes [](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// DEFAULT: num_threads [1, 2, 64](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// OPTIONS: num_threads [1, 4, 32](mapping = [#gpu.thread<z>, #gpu.thread<y>, #gpu.thread<x>])
// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse
Expand Down Expand Up @@ -210,8 +210,8 @@ module attributes {hal.device.targets = [#device_target_cuda]} {
// DEFAULT: factor = 2
// OPTIONS: factor = 3
// CHECK: apply_patterns
// CHECK: transform.apply_patterns.vector.transfer_to_scf max_transfer_rank = 1 full_unroll = true
// CHECK: apply_patterns
// CHECK: transform.apply_patterns.vector.transfer_to_scf full_unroll = true
// CHECK: apply_patterns
// CHECK: transform.iree.apply_licm
// CHECK: transform.iree.apply_cse
// CHECK: transform.iree.create_async_groups
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {t
// CHECK: transform.structured.convert_conv2d_to_img2col
// CHECK: get_producer_of_operand %{{.*}}[0]
// CHECK: transform.apply_patterns.iree.bubble_collapse
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [] tile_sizes [1, 128, 128](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// CHECK: transform.structured.tile_using_forall %{{.*}} tile_sizes [1, 128, 128](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// CHECK: transform.structured.fuse_into_containing_op
// CHECK: transform.iree.populate_workgroup_count_region_using_num_threads_slice %{{.*}}
// CHECK: transform.structured.match ops{["linalg.fill"]}
Expand All @@ -49,10 +49,10 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {t
// CHECK: %[[LHS:.+]] = get_producer_of_operand %{{.*}}[0]
// CHECK: %[[RHS:.+]] = get_producer_of_operand %{{.*}}[1]
// CHECK: transform.structured.rewrite_in_destination_passing_style %[[LHS]]
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [32, 4] tile_sizes [](mapping = [#gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %[[RHS]] num_threads [1, 4, 32] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2] tile_sizes [](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2] tile_sizes [](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [32, 4](mapping = [#gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %[[RHS]] num_threads [1, 4, 32](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.apply_patterns.iree.fold_reshape_into_tensor_hal_interface
// CHECK: transform.apply_patterns.linalg.fold_unit_extent_dims_via_slices
// CHECK: transform.apply_patterns.vector.cast_away_vector_leading_one_dim
Expand Down Expand Up @@ -101,17 +101,17 @@ hal.executable.variant public @cuda_nvptx_fb target(<"cuda", "cuda-nvptx-fb", {t
// CHECK-LABEL: func @nhwc_convolution

// CHECK: transform.sequence failures(propagate) {
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [] tile_sizes [1, 128, 128](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// CHECK: transform.structured.tile_using_forall %{{.*}} tile_sizes [1, 128, 128](mapping = [#gpu.block<z>, #gpu.block<y>, #gpu.block<x>])
// CHECK: transform.structured.pad %{{.*}} {copy_back_op = "none", pack_paddings = [0, 1, 1], pad_to_multiple_of = [1, 1, 1, 1], padding_dimensions = [0, 1, 2, 3], padding_values = [0.000000e+00 : f32, 0.000000e+00 : f32, 0.000000e+00 : f32]}
// CHECK: %[[RES:.+]] = get_producer_of_operand %{{.*}}[2]
// CHECK: transform.structured.rewrite_in_destination_passing_style %[[RES]]
// CHECK: %[[LHS:.+]] = get_producer_of_operand %{{.*}}[0]
// CHECK: %[[RHS:.+]] = get_producer_of_operand %{{.*}}[1]
// CHECK: transform.structured.rewrite_in_destination_passing_style %[[RHS]]
// CHECK: transform.structured.tile_using_forall %[[LHS]] num_threads [1, 32, 4] tile_sizes [](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [4, 32] tile_sizes [](mapping = [#gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2] tile_sizes [](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2] tile_sizes [](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.structured.tile_using_forall %[[LHS]] num_threads [1, 32, 4](mapping = [#gpu.thread<linear_dim_2>, #gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [4, 32](mapping = [#gpu.thread<linear_dim_1>, #gpu.thread<linear_dim_0>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.structured.tile_using_forall %{{.*}} num_threads [1, 2, 2](mapping = [#gpu.warp<z>, #gpu.warp<y>, #gpu.warp<x>])
// CHECK: transform.iree.map_nested_forall_to_gpu_threads %{{.*}} workgroup_dims = [64, 2, 1]


Expand Down
Loading

0 comments on commit 28732a2

Please sign in to comment.