Skip to content

Commit

Permalink
Support LargeList in embed_array_storage
Browse files Browse the repository at this point in the history
  • Loading branch information
albertvillanova committed Aug 6, 2024
1 parent f11c56d commit 27d0f94
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/datasets/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2154,6 +2154,11 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType"):
return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature[0]))
if isinstance(feature, Sequence) and feature.length == -1:
return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
elif pa.types.is_large_list(array.type):
# feature must be either LargeList(subfeature)
# Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
array_offsets = _combine_list_array_offsets_with_mask(array)
return pa.LargeListArray.from_arrays(array_offsets, _e(array.values, feature.dtype))
elif pa.types.is_fixed_size_list(array.type):
# feature must be Sequence(subfeature)
if isinstance(feature, Sequence) and feature.length > -1:
Expand Down

0 comments on commit 27d0f94

Please sign in to comment.