Clean up really long line lengths in Anderson2021 (#7728)

* Clean up really long line lengths in Anderson2021 We don't have an explicit line length limit in Halide, but generally consider 120 to be a reasonable extent; a lot of code in Anderson2021 went waaaay over this limit, especially function/method calls. I did a semi-manual cleanup to try to clean up the worst offenders. Should be 100% cosmetic. * Add LoopNestMap * Fixes
halide · Aug 2, 2023 · 734df3f · 734df3f
1 parent ef24391
commit 734df3f
Show file tree

Hide file tree

Showing 16 changed files with 951 additions and 390 deletions.
diff --git a/src/autoschedulers/anderson2021/AutoSchedule.cpp b/src/autoschedulers/anderson2021/AutoSchedule.cpp
@@ -25,7 +25,9 @@
   value of HL_DEBUG_CODEGEN, if any).
 
   HL_PERMIT_FAILED_UNROLL
-  Set to 1 to tell Halide not to freak out if we try to unroll a loop that doesn't have a constant extent. Should generally not be necessary, but sometimes the autoscheduler's model for what will and will not turn into a constant during lowering is inaccurate, because Halide isn't perfect at constant-folding.
+  Set to 1 to tell Halide not to freak out if we try to unroll a loop that doesn't have a constant extent.
+  Should generally not be necessary, but sometimes the autoscheduler's model for what will and will not
+  turn into a constant during lowering is inaccurate, because Halide isn't perfect at constant-folding.
 
 #ifdef HALIDE_AUTOSCHEDULER_ALLOW_CYOS
 
@@ -199,7 +201,15 @@ AutoSchedule::AutoSchedule(const FunctionDAG &dag,
                            Statistics &stats,
                            SearchSpace &search_space,
                            const LoopNestParser *partial_schedule)
-    : dag{dag}, params{params}, target{target}, outputs{outputs}, rng{rng}, cost_model{cost_model}, stats{stats}, search_space{search_space}, partial_schedule{partial_schedule} {
+    : dag{dag},
+      params{params},
+      target{target},
+      outputs{outputs},
+      rng{rng},
+      cost_model{cost_model},
+      stats{stats},
+      search_space{search_space},
+      partial_schedule{partial_schedule} {
     configure_pipeline_features(dag, params, cost_model);
 }
 
@@ -220,27 +230,26 @@ IntrusivePtr<State> AutoSchedule::optimal_schedule_pass(int beam_size,
 
     int expanded = 0;
 
-    std::function<void(IntrusivePtr<State> &&)> enqueue_new_children =
-        [&](IntrusivePtr<State> &&s) {
-            // aslog(1) << "\n** Generated child: ";
-            // s->dump();
-            // s->calculate_cost(dag, params, nullptr, true);
+    std::function<void(IntrusivePtr<State> &&)> enqueue_new_children = [&](IntrusivePtr<State> &&s) {
+        // aslog(1) << "\n** Generated child: ";
+        // s->dump();
+        // s->calculate_cost(dag, params, nullptr, true);
 
-            // Each child should have one more decision made than its parent state.
-            internal_assert(s->num_decisions_made == s->parent->num_decisions_made + 1);
+        // Each child should have one more decision made than its parent state.
+        internal_assert(s->num_decisions_made == s->parent->num_decisions_made + 1);
 
-            int progress = s->num_decisions_made * beam_size + expanded;
-            size_t max_progress = dag.nodes.size() * beam_size * 2;
+        int progress = s->num_decisions_made * beam_size + expanded;
+        size_t max_progress = dag.nodes.size() * beam_size * 2;
 
-            // Update the progress bar
-            tick.set(double(progress) / max_progress);
-            s->penalized = false;
+        // Update the progress bar
+        tick.set(double(progress) / max_progress);
+        s->penalized = false;
 
-            ++stats.num_states_added;
+        ++stats.num_states_added;
 
-            // Add the state to the list of states to evaluate
-            q.emplace(std::move(s));
-        };
+        // Add the state to the list of states to evaluate
+        q.emplace(std::move(s));
+    };
 
     std::unique_ptr<LoopNestParser> target_loop_nest;
 
@@ -600,7 +609,15 @@ void generate_schedule(const std::vector<Function> &outputs,
     std::mt19937 rng{(uint32_t)params.random_dropout_seed};
     SearchSpace search_space{dag, params, target, rng, cost_model.get(), stats, partial_schedule.get()};
 
-    AutoSchedule autoschedule{dag, params, target, outputs, rng, cost_model.get(), stats, search_space, partial_schedule.get()};
+    AutoSchedule autoschedule{dag,
+                              params,
+                              target,
+                              outputs,
+                              rng,
+                              cost_model.get(),
+                              stats,
+                              search_space,
+                              partial_schedule.get()};
 
     // Run beam search
     optimal = autoschedule.optimal_schedule(params.beam_size);
@@ -656,7 +673,8 @@ void generate_schedule(const std::vector<Function> &outputs,
     aslog(1) << "Total cost model evaluation time (ms): " << stats.total_cost_model_evaluation_time() << "\n";
     aslog(1) << "Average cost model evaluation time (ms): " << stats.average_cost_model_evaluation_time() << "\n";
     std::chrono::duration<double> total_time = timer.elapsed();
-    aslog(1) << "Time taken for autoscheduler (s): " << std::chrono::duration_cast<std::chrono::milliseconds>(total_time).count() / 1000.0 << '\n';
+    aslog(1) << "Time taken for autoscheduler (s): "
+             << std::chrono::duration_cast<std::chrono::milliseconds>(total_time).count() / 1000.0 << '\n';
 }
 
 struct Anderson2021 {
@@ -717,7 +735,15 @@ void find_and_apply_schedule(FunctionDAG &dag,
     }
 
     SearchSpace search_space{dag, params, target, rng, cost_model, stats, partial_schedule.get()};
-    AutoSchedule autoschedule{dag, params, target, outputs, rng, cost_model, stats, search_space, partial_schedule.get()};
+    AutoSchedule autoschedule{dag,
+                              params,
+                              target,
+                              outputs,
+                              rng,
+                              cost_model,
+                              stats,
+                              search_space,
+                              partial_schedule.get()};
 
     IntrusivePtr<State> optimal = autoschedule.optimal_schedule(beam_size);
 

diff --git a/src/autoschedulers/anderson2021/DefaultCostModel.cpp b/src/autoschedulers/anderson2021/DefaultCostModel.cpp
@@ -51,8 +51,7 @@ void DefaultCostModel::set_pipeline_features(const Internal::Autoscheduler::Func
     const int pipeline_feat_size = head1_w * head1_h;
     // We ignore the first seven pipeline features in the cost
     // model. It's just a mask of which types are in use.
-    static_assert(sizeof(PipelineFeatures) - 7 * sizeof(int) ==
-                      sizeof(int) * pipeline_feat_size,
+    static_assert(sizeof(PipelineFeatures) - 7 * sizeof(int) == sizeof(int) * pipeline_feat_size,
                   "Incorrect size for pipeline features");
     int num_stages = 0;
     for (const auto &n : dag.nodes) {
@@ -231,15 +230,22 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim
                                   batch_id,
                                   pipeline_feat_queue,
                                   schedule_feat_queue,
-                                  weights.head1_filter, weights.head1_bias,
-                                  weights.head2_filter, weights.head2_bias,
-                                  weights.conv1_filter, weights.conv1_bias,
-                                  learning_rate, timestep++,
+                                  weights.head1_filter,
+                                  weights.head1_bias,
+                                  weights.head2_filter,
+                                  weights.head2_bias,
+                                  weights.conv1_filter,
+                                  weights.conv1_bias,
+                                  learning_rate,
+                                  timestep++,
                                   fastest_idx,
                                   true_runtimes.alias(),
-                                  head1_filter_update, head1_bias_update,
-                                  head2_filter_update, head2_bias_update,
-                                  conv1_filter_update, conv1_bias_update,
+                                  head1_filter_update,
+                                  head1_bias_update,
+                                  head2_filter_update,
+                                  head2_bias_update,
+                                  conv1_filter_update,
+                                  conv1_bias_update,
                                   dst,
                                   dst_costs_per_stage,
                                   loss);

diff --git a/src/autoschedulers/anderson2021/FunctionDAG.cpp b/src/autoschedulers/anderson2021/FunctionDAG.cpp
@@ -239,10 +239,10 @@ class Featurizer : public IRVisitor {
     void visit_memory_access(const std::string &name, Type t, const vector<Expr> &args, PipelineFeatures::AccessType type) {
         // Compute matrix of partial derivatives of args w.r.t. loop params
         LoadJacobian matrix(args.size(), stage.loop.size(), 1);
-        vector<size_t> ones_per_row(args.size(), 0),
-            zeros_per_row(args.size(), 0),
-            ones_per_col(stage.loop.size(), 0),
-            zeros_per_col(stage.loop.size(), 0);
+        vector<size_t> ones_per_row(args.size(), 0);
+        vector<size_t> zeros_per_row(args.size(), 0);
+        vector<size_t> ones_per_col(stage.loop.size(), 0);
+        vector<size_t> zeros_per_col(stage.loop.size(), 0);
         bool is_pointwise = args.size() == stage.loop.size();
         for (size_t i = 0; i < args.size(); i++) {
             for (size_t j = 0; j < stage.loop.size(); j++) {
@@ -295,7 +295,8 @@ class Featurizer : public IRVisitor {
 
 public:
     Featurizer(Function &func, FunctionDAG::Node::Stage &stage)
-        : func(func), stage(stage) {
+        : func(func),
+          stage(stage) {
     }
 
     void visit_store_args(const std::string &name, Type t, vector<Expr> args) {

diff --git a/src/autoschedulers/anderson2021/FunctionDAG.h b/src/autoschedulers/anderson2021/FunctionDAG.h
@@ -39,7 +39,8 @@ struct OptionalRational {
 
     OptionalRational() = default;
     OptionalRational(int64_t n, int64_t d)
-        : numerator(n), denominator(d) {
+        : numerator(n),
+          denominator(d) {
     }
 
     void operator+=(const OptionalRational &other) {
@@ -137,7 +138,9 @@ class LoadJacobian {
 
 public:
     LoadJacobian(size_t producer_storage_dims, size_t consumer_loop_dims, int64_t count)
-        : c(count), rows(producer_storage_dims), cols(consumer_loop_dims) {
+        : c(count),
+          rows(producer_storage_dims),
+          cols(consumer_loop_dims) {
         coeffs.resize(rows * cols);
     }
 
@@ -283,7 +286,9 @@ class Span {
     }
 
     Span(int64_t a, int64_t b, bool c)
-        : min_(a), max_(b), constant_extent_(c) {
+        : min_(a),
+          max_(b),
+          constant_extent_(c) {
     }
     Span() = default;
     Span(const Span &other) = default;

diff --git a/src/autoschedulers/anderson2021/GPUMemInfo.h b/src/autoschedulers/anderson2021/GPUMemInfo.h
@@ -175,7 +175,10 @@ struct Strides {
 
 struct GlobalAccessAccumulator {
     GlobalAccessAccumulator(int bytes_per_access, size_t dimensions, const Strides &strides, bool verbose)
-        : bytes_per_access{bytes_per_access}, dimensions{dimensions}, strides{strides}, verbose{verbose} {
+        : bytes_per_access{bytes_per_access},
+          dimensions{dimensions},
+          strides{strides},
+          verbose{verbose} {
     }
 
     void operator()(int thread_id, int x, int y, int z, int active, bool last_thread) {
@@ -257,7 +260,10 @@ struct GlobalAccessAccumulator {
 
 struct SharedAccessAccumulator {
     SharedAccessAccumulator(int bytes_per_access, size_t dimensions, const Strides &strides, bool verbose)
-        : bytes_per_access{bytes_per_access}, dimensions{dimensions}, strides{strides}, verbose{verbose} {
+        : bytes_per_access{bytes_per_access},
+          dimensions{dimensions},
+          strides{strides},
+          verbose{verbose} {
     }
 
     void operator()(int thread_id, int x, int y, int z, int active, bool last_thread) {
@@ -348,7 +354,8 @@ struct SharedAccessAccumulator {
 
 struct LocalAccessAccumulator {
     LocalAccessAccumulator(int bytes_per_access, bool verbose)
-        : bytes_per_access{bytes_per_access}, verbose{verbose} {
+        : bytes_per_access{bytes_per_access},
+          verbose{verbose} {
     }
 
     void operator()(int thread_id, int x, int y, int z, int active, bool last_thread) {