diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py index 75af2a3bee9..732bde1e9bb 100644 --- a/src/datasets/features/features.py +++ b/src/datasets/features/features.py @@ -1447,11 +1447,14 @@ def generate_from_arrow_type(pa_type: pa.DataType) -> FeatureType: return {field.name: generate_from_arrow_type(field.type) for field in pa_type} elif isinstance(pa_type, pa.FixedSizeListType): return Sequence(feature=generate_from_arrow_type(pa_type.value_type), length=pa_type.list_size) - elif isinstance(pa_type, (pa.ListType, pa.LargeListType)): + elif isinstance(pa_type, pa.ListType): feature = generate_from_arrow_type(pa_type.value_type) if isinstance(feature, (dict, tuple, list)): return [feature] return Sequence(feature=feature) + elif isinstance(pa_type, pa.LargeListType): + dtype = generate_from_arrow_type(pa_type.value_type) + return LargeList(dtype) elif isinstance(pa_type, _ArrayXDExtensionType): array_feature = [None, None, Array2D, Array3D, Array4D, Array5D][pa_type.ndims] return array_feature(shape=pa_type.shape, dtype=pa_type.value_type)