Skip to content

Commit

Permalink
Clean up really long line lengths in Anderson2021 (#7728)
Browse files Browse the repository at this point in the history
* Clean up really long line lengths in Anderson2021

We don't have an explicit line length limit in Halide, but generally consider 120 to be a reasonable extent; a lot of code in Anderson2021 went waaaay over this limit, especially function/method calls. I did a semi-manual cleanup to try to clean up the worst offenders. Should be 100% cosmetic.

* Add LoopNestMap

* Fixes
  • Loading branch information
steven-johnson authored Aug 2, 2023
1 parent ef24391 commit 734df3f
Show file tree
Hide file tree
Showing 16 changed files with 951 additions and 390 deletions.
68 changes: 47 additions & 21 deletions src/autoschedulers/anderson2021/AutoSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
value of HL_DEBUG_CODEGEN, if any).
HL_PERMIT_FAILED_UNROLL
Set to 1 to tell Halide not to freak out if we try to unroll a loop that doesn't have a constant extent. Should generally not be necessary, but sometimes the autoscheduler's model for what will and will not turn into a constant during lowering is inaccurate, because Halide isn't perfect at constant-folding.
Set to 1 to tell Halide not to freak out if we try to unroll a loop that doesn't have a constant extent.
Should generally not be necessary, but sometimes the autoscheduler's model for what will and will not
turn into a constant during lowering is inaccurate, because Halide isn't perfect at constant-folding.
#ifdef HALIDE_AUTOSCHEDULER_ALLOW_CYOS
Expand Down Expand Up @@ -199,7 +201,15 @@ AutoSchedule::AutoSchedule(const FunctionDAG &dag,
Statistics &stats,
SearchSpace &search_space,
const LoopNestParser *partial_schedule)
: dag{dag}, params{params}, target{target}, outputs{outputs}, rng{rng}, cost_model{cost_model}, stats{stats}, search_space{search_space}, partial_schedule{partial_schedule} {
: dag{dag},
params{params},
target{target},
outputs{outputs},
rng{rng},
cost_model{cost_model},
stats{stats},
search_space{search_space},
partial_schedule{partial_schedule} {
configure_pipeline_features(dag, params, cost_model);
}

Expand All @@ -220,27 +230,26 @@ IntrusivePtr<State> AutoSchedule::optimal_schedule_pass(int beam_size,

int expanded = 0;

std::function<void(IntrusivePtr<State> &&)> enqueue_new_children =
[&](IntrusivePtr<State> &&s) {
// aslog(1) << "\n** Generated child: ";
// s->dump();
// s->calculate_cost(dag, params, nullptr, true);
std::function<void(IntrusivePtr<State> &&)> enqueue_new_children = [&](IntrusivePtr<State> &&s) {
// aslog(1) << "\n** Generated child: ";
// s->dump();
// s->calculate_cost(dag, params, nullptr, true);

// Each child should have one more decision made than its parent state.
internal_assert(s->num_decisions_made == s->parent->num_decisions_made + 1);
// Each child should have one more decision made than its parent state.
internal_assert(s->num_decisions_made == s->parent->num_decisions_made + 1);

int progress = s->num_decisions_made * beam_size + expanded;
size_t max_progress = dag.nodes.size() * beam_size * 2;
int progress = s->num_decisions_made * beam_size + expanded;
size_t max_progress = dag.nodes.size() * beam_size * 2;

// Update the progress bar
tick.set(double(progress) / max_progress);
s->penalized = false;
// Update the progress bar
tick.set(double(progress) / max_progress);
s->penalized = false;

++stats.num_states_added;
++stats.num_states_added;

// Add the state to the list of states to evaluate
q.emplace(std::move(s));
};
// Add the state to the list of states to evaluate
q.emplace(std::move(s));
};

std::unique_ptr<LoopNestParser> target_loop_nest;

Expand Down Expand Up @@ -600,7 +609,15 @@ void generate_schedule(const std::vector<Function> &outputs,
std::mt19937 rng{(uint32_t)params.random_dropout_seed};
SearchSpace search_space{dag, params, target, rng, cost_model.get(), stats, partial_schedule.get()};

AutoSchedule autoschedule{dag, params, target, outputs, rng, cost_model.get(), stats, search_space, partial_schedule.get()};
AutoSchedule autoschedule{dag,
params,
target,
outputs,
rng,
cost_model.get(),
stats,
search_space,
partial_schedule.get()};

// Run beam search
optimal = autoschedule.optimal_schedule(params.beam_size);
Expand Down Expand Up @@ -656,7 +673,8 @@ void generate_schedule(const std::vector<Function> &outputs,
aslog(1) << "Total cost model evaluation time (ms): " << stats.total_cost_model_evaluation_time() << "\n";
aslog(1) << "Average cost model evaluation time (ms): " << stats.average_cost_model_evaluation_time() << "\n";
std::chrono::duration<double> total_time = timer.elapsed();
aslog(1) << "Time taken for autoscheduler (s): " << std::chrono::duration_cast<std::chrono::milliseconds>(total_time).count() / 1000.0 << '\n';
aslog(1) << "Time taken for autoscheduler (s): "
<< std::chrono::duration_cast<std::chrono::milliseconds>(total_time).count() / 1000.0 << '\n';
}

struct Anderson2021 {
Expand Down Expand Up @@ -717,7 +735,15 @@ void find_and_apply_schedule(FunctionDAG &dag,
}

SearchSpace search_space{dag, params, target, rng, cost_model, stats, partial_schedule.get()};
AutoSchedule autoschedule{dag, params, target, outputs, rng, cost_model, stats, search_space, partial_schedule.get()};
AutoSchedule autoschedule{dag,
params,
target,
outputs,
rng,
cost_model,
stats,
search_space,
partial_schedule.get()};

IntrusivePtr<State> optimal = autoschedule.optimal_schedule(beam_size);

Expand Down
24 changes: 15 additions & 9 deletions src/autoschedulers/anderson2021/DefaultCostModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ void DefaultCostModel::set_pipeline_features(const Internal::Autoscheduler::Func
const int pipeline_feat_size = head1_w * head1_h;
// We ignore the first seven pipeline features in the cost
// model. It's just a mask of which types are in use.
static_assert(sizeof(PipelineFeatures) - 7 * sizeof(int) ==
sizeof(int) * pipeline_feat_size,
static_assert(sizeof(PipelineFeatures) - 7 * sizeof(int) == sizeof(int) * pipeline_feat_size,
"Incorrect size for pipeline features");
int num_stages = 0;
for (const auto &n : dag.nodes) {
Expand Down Expand Up @@ -231,15 +230,22 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim
batch_id,
pipeline_feat_queue,
schedule_feat_queue,
weights.head1_filter, weights.head1_bias,
weights.head2_filter, weights.head2_bias,
weights.conv1_filter, weights.conv1_bias,
learning_rate, timestep++,
weights.head1_filter,
weights.head1_bias,
weights.head2_filter,
weights.head2_bias,
weights.conv1_filter,
weights.conv1_bias,
learning_rate,
timestep++,
fastest_idx,
true_runtimes.alias(),
head1_filter_update, head1_bias_update,
head2_filter_update, head2_bias_update,
conv1_filter_update, conv1_bias_update,
head1_filter_update,
head1_bias_update,
head2_filter_update,
head2_bias_update,
conv1_filter_update,
conv1_bias_update,
dst,
dst_costs_per_stage,
loss);
Expand Down
11 changes: 6 additions & 5 deletions src/autoschedulers/anderson2021/FunctionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,10 @@ class Featurizer : public IRVisitor {
void visit_memory_access(const std::string &name, Type t, const vector<Expr> &args, PipelineFeatures::AccessType type) {
// Compute matrix of partial derivatives of args w.r.t. loop params
LoadJacobian matrix(args.size(), stage.loop.size(), 1);
vector<size_t> ones_per_row(args.size(), 0),
zeros_per_row(args.size(), 0),
ones_per_col(stage.loop.size(), 0),
zeros_per_col(stage.loop.size(), 0);
vector<size_t> ones_per_row(args.size(), 0);
vector<size_t> zeros_per_row(args.size(), 0);
vector<size_t> ones_per_col(stage.loop.size(), 0);
vector<size_t> zeros_per_col(stage.loop.size(), 0);
bool is_pointwise = args.size() == stage.loop.size();
for (size_t i = 0; i < args.size(); i++) {
for (size_t j = 0; j < stage.loop.size(); j++) {
Expand Down Expand Up @@ -295,7 +295,8 @@ class Featurizer : public IRVisitor {

public:
Featurizer(Function &func, FunctionDAG::Node::Stage &stage)
: func(func), stage(stage) {
: func(func),
stage(stage) {
}

void visit_store_args(const std::string &name, Type t, vector<Expr> args) {
Expand Down
11 changes: 8 additions & 3 deletions src/autoschedulers/anderson2021/FunctionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ struct OptionalRational {

OptionalRational() = default;
OptionalRational(int64_t n, int64_t d)
: numerator(n), denominator(d) {
: numerator(n),
denominator(d) {
}

void operator+=(const OptionalRational &other) {
Expand Down Expand Up @@ -137,7 +138,9 @@ class LoadJacobian {

public:
LoadJacobian(size_t producer_storage_dims, size_t consumer_loop_dims, int64_t count)
: c(count), rows(producer_storage_dims), cols(consumer_loop_dims) {
: c(count),
rows(producer_storage_dims),
cols(consumer_loop_dims) {
coeffs.resize(rows * cols);
}

Expand Down Expand Up @@ -283,7 +286,9 @@ class Span {
}

Span(int64_t a, int64_t b, bool c)
: min_(a), max_(b), constant_extent_(c) {
: min_(a),
max_(b),
constant_extent_(c) {
}
Span() = default;
Span(const Span &other) = default;
Expand Down
13 changes: 10 additions & 3 deletions src/autoschedulers/anderson2021/GPUMemInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,10 @@ struct Strides {

struct GlobalAccessAccumulator {
GlobalAccessAccumulator(int bytes_per_access, size_t dimensions, const Strides &strides, bool verbose)
: bytes_per_access{bytes_per_access}, dimensions{dimensions}, strides{strides}, verbose{verbose} {
: bytes_per_access{bytes_per_access},
dimensions{dimensions},
strides{strides},
verbose{verbose} {
}

void operator()(int thread_id, int x, int y, int z, int active, bool last_thread) {
Expand Down Expand Up @@ -257,7 +260,10 @@ struct GlobalAccessAccumulator {

struct SharedAccessAccumulator {
SharedAccessAccumulator(int bytes_per_access, size_t dimensions, const Strides &strides, bool verbose)
: bytes_per_access{bytes_per_access}, dimensions{dimensions}, strides{strides}, verbose{verbose} {
: bytes_per_access{bytes_per_access},
dimensions{dimensions},
strides{strides},
verbose{verbose} {
}

void operator()(int thread_id, int x, int y, int z, int active, bool last_thread) {
Expand Down Expand Up @@ -348,7 +354,8 @@ struct SharedAccessAccumulator {

struct LocalAccessAccumulator {
LocalAccessAccumulator(int bytes_per_access, bool verbose)
: bytes_per_access{bytes_per_access}, verbose{verbose} {
: bytes_per_access{bytes_per_access},
verbose{verbose} {
}

void operator()(int thread_id, int x, int y, int z, int active, bool last_thread) {
Expand Down
Loading

0 comments on commit 734df3f

Please sign in to comment.