From a1eff5ccc71dd2ba567f105701822db285f194d5 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 2 Aug 2024 14:51:52 +0200 Subject: [PATCH] Support LargeList in generate_from_arrow_type --- src/datasets/features/features.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py index 75af2a3bee9..732bde1e9bb 100644 --- a/src/datasets/features/features.py +++ b/src/datasets/features/features.py @@ -1447,11 +1447,14 @@ def generate_from_arrow_type(pa_type: pa.DataType) -> FeatureType: return {field.name: generate_from_arrow_type(field.type) for field in pa_type} elif isinstance(pa_type, pa.FixedSizeListType): return Sequence(feature=generate_from_arrow_type(pa_type.value_type), length=pa_type.list_size) - elif isinstance(pa_type, (pa.ListType, pa.LargeListType)): + elif isinstance(pa_type, pa.ListType): feature = generate_from_arrow_type(pa_type.value_type) if isinstance(feature, (dict, tuple, list)): return [feature] return Sequence(feature=feature) + elif isinstance(pa_type, pa.LargeListType): + dtype = generate_from_arrow_type(pa_type.value_type) + return LargeList(dtype) elif isinstance(pa_type, _ArrayXDExtensionType): array_feature = [None, None, Array2D, Array3D, Array4D, Array5D][pa_type.ndims] return array_feature(shape=pa_type.shape, dtype=pa_type.value_type)