From 27d0f94ede4a537f5e5c07e5bc075c783f46bc5d Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:36:36 +0200 Subject: [PATCH] Support LargeList in embed_array_storage --- src/datasets/table.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/datasets/table.py b/src/datasets/table.py index f3e5f26baa8..a536a101619 100644 --- a/src/datasets/table.py +++ b/src/datasets/table.py @@ -2154,6 +2154,11 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType"): return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature[0])) if isinstance(feature, Sequence) and feature.length == -1: return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature.feature)) + elif pa.types.is_large_list(array.type): + # feature must be either LargeList(subfeature) + # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError + array_offsets = _combine_list_array_offsets_with_mask(array) + return pa.LargeListArray.from_arrays(array_offsets, _e(array.values, feature.dtype)) elif pa.types.is_fixed_size_list(array.type): # feature must be Sequence(subfeature) if isinstance(feature, Sequence) and feature.length > -1: