From bececdac927160b5c7e883736d7cc79d5699ad0a Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 15 Aug 2024 12:33:30 +0200
Subject: [PATCH] Fix args of feature docstrings (#7103)

* Fix args of feature docstrings

* Align dtype arg description in docstrings

* Align shape arg description in docstrings

* Rename FeatureConnector to Feature

* Add feature subsections in docs

* Add LargeList to Features docstring and rephrase

* Rephrase
---
 .../source/package_reference/main_classes.mdx | 12 ++++
 src/datasets/features/features.py             | 59 ++++++++++---------
 src/datasets/features/translation.py          |  4 +-
 3 files changed, 46 insertions(+), 29 deletions(-)

diff --git a/docs/source/package_reference/main_classes.mdx b/docs/source/package_reference/main_classes.mdx
index 86257f624b4..f9700fd19be 100644
--- a/docs/source/package_reference/main_classes.mdx
+++ b/docs/source/package_reference/main_classes.mdx
@@ -211,18 +211,26 @@ Dictionary with split names as keys ('train', 'test' for example), and `Iterable
 
 [[autodoc]] datasets.Features
 
+### Scalar
+
 [[autodoc]] datasets.Value
 
 [[autodoc]] datasets.ClassLabel
 
+### Composite
+
 [[autodoc]] datasets.LargeList
 
 [[autodoc]] datasets.Sequence
 
+### Translation
+
 [[autodoc]] datasets.Translation
 
 [[autodoc]] datasets.TranslationVariableLanguages
 
+### Arrays
+
 [[autodoc]] datasets.Array2D
 
 [[autodoc]] datasets.Array3D
@@ -231,8 +239,12 @@ Dictionary with split names as keys ('train', 'test' for example), and `Iterable
 
 [[autodoc]] datasets.Array5D
 
+### Audio
+
 [[autodoc]] datasets.Audio
 
+### Image
+
 [[autodoc]] datasets.Image
 
 ## Filesystems
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index dc7c0f8c850..4f0a75c4753 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -460,8 +460,9 @@ def cast_to_python_objects(obj: Any, only_1d_for_numpy=False, optimize_list_cast
 @dataclass
 class Value:
     """
-    The `Value` dtypes are as follows:
+    Scalar feature value of a particular data type.
 
+    The possible dtypes of `Value` are as follows:
     - `null`
     - `bool`
     - `int8`
@@ -489,6 +490,10 @@ class Value:
     - `string`
     - `large_string`
 
+    Args:
+        dtype (`str`):
+            Name of the data type.
+
     Example:
 
     ```py
@@ -546,9 +551,9 @@ class Array2D(_ArrayXD):
 
     Args:
         shape (`tuple`):
-            The size of each dimension.
+            Size of each dimension.
         dtype (`str`):
-            The value of the data type.
+            Name of the data type.
 
     Example:
 
@@ -571,9 +576,9 @@ class Array3D(_ArrayXD):
 
     Args:
         shape (`tuple`):
-            The size of each dimension.
+            Size of each dimension.
         dtype (`str`):
-            The value of the data type.
+            Name of the data type.
 
     Example:
 
@@ -596,9 +601,9 @@ class Array4D(_ArrayXD):
 
     Args:
         shape (`tuple`):
-            The size of each dimension.
+            Size of each dimension.
         dtype (`str`):
-            The value of the data type.
+            Name of the data type.
 
     Example:
 
@@ -621,9 +626,9 @@ class Array5D(_ArrayXD):
 
     Args:
         shape (`tuple`):
-            The size of each dimension.
+            Size of each dimension.
         dtype (`str`):
-            The value of the data type.
+            Name of the data type.
 
     Example:
 
@@ -1139,7 +1144,7 @@ class Sequence:
     Mostly here for compatiblity with tfds.
 
     Args:
-        feature:
+        feature ([`FeatureType`]):
             A list of features of a single type or a dictionary of types.
         length (`int`):
             Length of the sequence.
@@ -1170,7 +1175,7 @@ class LargeList:
     It is backed by `pyarrow.LargeListType`, which is like `pyarrow.ListType` but with 64-bit rather than 32-bit offsets.
 
     Args:
-        dtype:
+        dtype ([`FeatureType`]):
             Child feature data type of each item within the large list.
     """
 
@@ -1695,30 +1700,30 @@ class Features(dict):
     and values are the type of that column.
 
     `FieldType` can be one of the following:
-        - a [`~datasets.Value`] feature specifies a single typed value, e.g. `int64` or `string`.
-        - a [`~datasets.ClassLabel`] feature specifies a field with a predefined set of classes which can have labels
-          associated to them and will be stored as integers in the dataset.
-        - a python `dict` which specifies that the field is a nested field containing a mapping of sub-fields to sub-fields
-          features. It's possible to have nested fields of nested fields in an arbitrary manner.
-        - a python `list` or a [`~datasets.Sequence`] specifies that the field contains a list of objects. The python
-          `list` or [`~datasets.Sequence`] should be provided with a single sub-feature as an example of the feature
-          type hosted in this list.
+        - [`Value`] feature specifies a single data type value, e.g. `int64` or `string`.
+        - [`ClassLabel`] feature specifies a predefined set of classes which can have labels associated to them and
+          will be stored as integers in the dataset.
+        - Python `dict` specifies a composite feature containing a mapping of sub-fields to sub-features.
+          It's possible to have nested fields of nested fields in an arbitrary manner.
+        - Python `list`, [`LargeList`] or [`Sequence`] specifies a composite feature containing a sequence of
+          sub-features, all of the same feature type.
 
           <Tip>
 
-           A [`~datasets.Sequence`] with a internal dictionary feature will be automatically converted into a dictionary of
+           A [`Sequence`] with an internal dictionary feature will be automatically converted into a dictionary of
            lists. This behavior is implemented to have a compatibility layer with the TensorFlow Datasets library but may be
-           un-wanted in some cases. If you don't want this behavior, you can use a python `list` instead of the
-           [`~datasets.Sequence`].
+           un-wanted in some cases. If you don't want this behavior, you can use a Python `list` or a [`LargeList`]
+           instead of the [`Sequence`].
 
           </Tip>
 
-        - a [`Array2D`], [`Array3D`], [`Array4D`] or [`Array5D`] feature for multidimensional arrays.
-        - an [`Audio`] feature to store the absolute path to an audio file or a dictionary with the relative path
+        - [`Array2D`], [`Array3D`], [`Array4D`] or [`Array5D`] feature for multidimensional arrays.
+        - [`Audio`] feature to store the absolute path to an audio file or a dictionary with the relative path
           to an audio file ("path" key) and its bytes content ("bytes" key). This feature extracts the audio data.
-        - an [`Image`] feature to store the absolute path to an image file, an `np.ndarray` object, a `PIL.Image.Image` object
-          or a dictionary with the relative path to an image file ("path" key) and its bytes content ("bytes" key). This feature extracts the image data.
-        - [`~datasets.Translation`] and [`~datasets.TranslationVariableLanguages`], the two features specific to Machine Translation.
+        - [`Image`] feature to store the absolute path to an image file, an `np.ndarray` object, a `PIL.Image.Image` object
+          or a dictionary with the relative path to an image file ("path" key) and its bytes content ("bytes" key).
+          This feature extracts the image data.
+        - [`Translation`] or [`TranslationVariableLanguages`] feature specific to Machine Translation.
     """
 
     def __init__(*args, **kwargs):
diff --git a/src/datasets/features/translation.py b/src/datasets/features/translation.py
index 1d3eb1af4bb..584bf3186c3 100644
--- a/src/datasets/features/translation.py
+++ b/src/datasets/features/translation.py
@@ -10,7 +10,7 @@
 
 @dataclass
 class Translation:
-    """`FeatureConnector` for translations with fixed languages per example.
+    """`Feature` for translations with fixed languages per example.
     Here for compatiblity with tfds.
 
     Args:
@@ -50,7 +50,7 @@ def flatten(self) -> Union["FeatureType", Dict[str, "FeatureType"]]:
 
 @dataclass
 class TranslationVariableLanguages:
-    """`FeatureConnector` for translations with variable languages per example.
+    """`Feature` for translations with variable languages per example.
     Here for compatiblity with tfds.
 
     Args: