From 84eac77f408f63df033e93e7ff6c2b1f3943ed36 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 18 Sep 2024 13:38:08 -0400 Subject: [PATCH] pack-objects: refactor path-walk delta phase Previously, the --path-walk option to 'git pack-objects' would compute deltas inline with the path-walk logic. This would make the progress indicator look like it is taking a long time to enumerate objects, and then very quickly computed deltas. Instead of computing deltas on each region of objects organized by tree, store a list of regions corresponding to these groups. These can later be pulled from the list for delta compression before doing the "global" delta search. This presents a new progress indicator that can be used in tests to verify that this stage is happening. The current implementation is not integrated with threads, but could be done in a future update. Since we do not attempt to sort objects by size until after exploring all trees, we can remove the previous change to t5530 due to a different error message appearing first. Signed-off-by: Derrick Stolee --- builtin/pack-objects.c | 81 +++++++++++++++++++++++++----------- pack-objects.h | 12 ++++++ t/t5300-pack-object.sh | 8 +++- t/t5530-upload-pack-error.sh | 6 --- 4 files changed, 74 insertions(+), 33 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 2d44dd7ac4e7c8..eb65b16d6e48d8 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3206,6 +3206,50 @@ static int should_attempt_deltas(struct object_entry *entry) return 1; } +static void find_deltas_for_region(struct object_entry *list UNUSED, + struct packing_region *region, + unsigned int *processed) +{ + struct object_entry **delta_list; + uint32_t delta_list_nr = 0; + + ALLOC_ARRAY(delta_list, region->nr); + for (uint32_t i = 0; i < region->nr; i++) { + struct object_entry *entry = to_pack.objects + region->start + i; + if (should_attempt_deltas(entry)) + delta_list[delta_list_nr++] = entry; + } + + QSORT(delta_list, delta_list_nr, type_size_sort); + find_deltas(delta_list, &delta_list_nr, window, depth, processed); + free(delta_list); +} + +static void find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + uint32_t start, uint32_t nr) +{ + unsigned int processed = 0; + uint32_t progress_nr; + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + + if (progress) + progress_state = start_progress(_("Compressing objects by path"), + progress_nr); + + while (nr--) + find_deltas_for_region(list, + ®ions[start++], + &processed); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -3230,6 +3274,10 @@ static void prepare_pack(int window, int depth) if (!to_pack.nr_objects || !window || !depth) return; + if (path_walk) + find_deltas_by_region(to_pack.objects, to_pack.regions, + 0, to_pack.nr_regions); + ALLOC_ARRAY(delta_list, to_pack.nr_objects); nr_deltas = n = 0; @@ -4167,10 +4215,8 @@ static int add_objects_by_path(const char *path, enum object_type type, void *data) { - struct object_entry **delta_list; size_t oe_start = to_pack.nr_objects; size_t oe_end; - unsigned int sub_list_size; unsigned int *processed = data; /* @@ -4203,32 +4249,17 @@ static int add_objects_by_path(const char *path, if (oe_end == oe_start || !window) return 0; - sub_list_size = 0; - ALLOC_ARRAY(delta_list, oe_end - oe_start); + ALLOC_GROW(to_pack.regions, + to_pack.nr_regions + 1, + to_pack.nr_regions_alloc); - for (size_t i = 0; i < oe_end - oe_start; i++) { - struct object_entry *entry = to_pack.objects + oe_start + i; + to_pack.regions[to_pack.nr_regions].start = oe_start; + to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start; + to_pack.nr_regions++; - if (!should_attempt_deltas(entry)) - continue; + *processed += oids->nr; + display_progress(progress_state, *processed); - delta_list[sub_list_size++] = entry; - } - - /* - * Find delta bases among this list of objects that all match the same - * path. This causes the delta compression to be interleaved in the - * object walk, which can lead to confusing progress indicators. This is - * also incompatible with threaded delta calculations. In the future, - * consider creating a list of regions in the full to_pack.objects array - * that could be picked up by the threaded delta computation. - */ - if (sub_list_size && window) { - QSORT(delta_list, sub_list_size, type_size_sort); - find_deltas(delta_list, &sub_list_size, window, depth, processed); - } - - free(delta_list); return 0; } diff --git a/pack-objects.h b/pack-objects.h index 88360aa3e8e79d..93f05b62231500 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -118,11 +118,23 @@ struct object_entry { unsigned ext_base:1; /* delta_idx points outside packlist */ }; +/** + * A packing region is a section of the packing_data.objects array + * as given by a starting index and a number of elements. + */ +struct packing_region { + uint32_t start; + uint32_t nr; +}; + struct packing_data { struct repository *repo; struct object_entry *objects; uint32_t nr_objects, nr_alloc; + struct packing_region *regions; + uint32_t nr_regions, nr_regions_alloc; + int32_t *index; uint32_t index_size; diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index 9176f1ba092c70..db345c4e997e22 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -692,7 +692,9 @@ test_expect_success '--full-name-hash and --write-bitmap-index are incompatible' # Basic "repack everything" test test_expect_success '--path-walk pack everything' ' git -C server rev-parse HEAD >in && - git -C server pack-objects --stdout --revs --path-walk out.pack && + GIT_PROGRESS_DELAY=0 git -C server pack-objects \ + --stdout --revs --path-walk --progress out.pack 2>err && + grep "Compressing objects by path" err && git -C server index-pack --stdin out.pack && + GIT_PROGRESS_DELAY=0 git -C server pack-objects \ + --thin --stdout --revs --path-walk --progress out.pack 2>err && + grep "Compressing objects by path" err && git -C server index-pack --fix-thin --stdin input && - - # The current implementation of path-walk causes a different - # error message. This will be changed by a future refactoring. - GIT_TEST_PACK_PATH_WALK=0 && - export GIT_TEST_PACK_PATH_WALK && - test_must_fail git upload-pack . /dev/null 2>output.err && test_grep "unable to read" output.err && test_grep "pack-objects died" output.err