Skip to content

Commit

Permalink
Inform Mullapudi2016's grouping algo of the gpu_blocks() dims.
Browse files Browse the repository at this point in the history
By design, a Stage cannot "compute_at" another stage's outer
gpu_blocks() dimensions. Inform the auto-grouping algorithm of all outer
dimensions by appending Vars to the lists: inner_dims and outer_dims.
Make can_parallel() and can_vectorize() optional.
  • Loading branch information
antonysigma committed Aug 29, 2023
1 parent 36040d5 commit a36d902
Showing 1 changed file with 20 additions and 9 deletions.
29 changes: 20 additions & 9 deletions src/autoschedulers/mullapudi2016/AutoSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,11 @@ class GPUTilingDedup {
VarOrRVar inner{var + "_i", v.is_rvar};

split_t entry{v, outer, inner, factor, TailStrategy::Auto};
parallelize.try_emplace(var, entry);
const auto [_, insertion_happened] = parallelize.try_emplace(var, entry);
if (!insertion_happened) {
return std::nullopt;
}

return entry;
}

Expand All @@ -1163,15 +1167,16 @@ class GPUTilingDedup {
* @param[in] vo split into outer dimension
* @param[in] vi split into inner dimension
* @param[in] factor the partition size.
* @return whether the vectorize() request is accepted or rejected.
*/
void can_vectorize(const VarOrRVar &v, const VarOrRVar &vo, const VarOrRVar &vi, const Expr &factor) {
bool can_vectorize(const VarOrRVar &v, const VarOrRVar &vo, const VarOrRVar &vi, const Expr &factor) {
const auto &var = v.name();

if (is_inner(var)) {
// For CPU, it makes sense to further split the inner loop and run
// SIMD instruction. But this operation is redundant in GPU as the
// gpu_block is already specified.
return;
return false;
}

debug(2) << f.name() << ".vectorize(" << v.name() << "," << factor << ")\n";
Expand All @@ -1180,10 +1185,11 @@ class GPUTilingDedup {
// vectorized dimension is treated as a thread in GPU. No need to
// further split it to match the natural_vector_size() of CPUs.
inner_vars.emplace(v.name());
return;
return false;
}

parallelize.try_emplace(var, split_t{v, vo, vi, factor, TailStrategy::Auto});
return true;
}

/** Mark the current dimension is already split by Mullapudi2016's
Expand Down Expand Up @@ -2880,11 +2886,11 @@ std::optional<pair<VarOrRVar, VarOrRVar>> Partitioner::vectorize_stage(const Gro
internal_assert(is_rvar == dims[vec_dim_index].is_rvar());

VarOrRVar vec_var(vec_dim_name, is_rvar);
auto [inner, outer] = [&]() -> std::pair<VarOrRVar, VarOrRVar> {
auto [inner, outer, accepted] = [&]() -> std::tuple<VarOrRVar, VarOrRVar, bool> {
if (t.has_gpu_feature()) {
VarOrRVar inner{vec_var.name() + "_vi", vec_var.is_rvar}, outer{vec_var.name() + "_vo", vec_var.is_rvar};
gpu_tiling.can_vectorize(vec_var, outer, inner, vec_len);
return {inner, outer};
const bool accepted = gpu_tiling.can_vectorize(vec_var, outer, inner, vec_len);
return {inner, outer, accepted};
}

auto split_vars = split_dim(g, f_handle, stage_num, def, is_group_output, vec_var, vec_len,
Expand All @@ -2894,7 +2900,7 @@ std::optional<pair<VarOrRVar, VarOrRVar>> Partitioner::vectorize_stage(const Gro
sched.push_schedule(f_handle.name(), stage_num,
"vectorize(" + split_vars.first.name() + ")",
{split_vars.first.name()});
return split_vars;
return std::make_tuple(split_vars.first, split_vars.second, true);
}();

if (is_rvar) {
Expand All @@ -2912,6 +2918,10 @@ std::optional<pair<VarOrRVar, VarOrRVar>> Partitioner::vectorize_stage(const Gro
<< "\" in function \"" << f_handle.name() << "\"\n";
}

if (!accepted) {
return std::nullopt;
}

return make_pair(inner, outer);
}

Expand Down Expand Up @@ -3284,7 +3294,8 @@ void Partitioner::generate_group_cpu_schedule(
}

// Find the level at which group members will be computed.
int tile_inner_index = dims.size() - outer_dims.size() - 1;
internal_assert(dims.size() > outer_dims.size());
const auto tile_inner_index = dims.size() - outer_dims.size() - 1;
VarOrRVar tile_inner_var(Var::outermost());
if (!outer_dims.empty()) {
string var_name = get_base_name(dims[tile_inner_index].var);
Expand Down

0 comments on commit a36d902

Please sign in to comment.