Merge branch 'main' into cutmixtut

pytorch · Jul 31, 2023 · a367808 · a367808
2 parents afb15d8 + 8d4e879
commit a367808
Show file tree

Hide file tree

Showing 44 changed files with 791 additions and 778 deletions.
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
@@ -9,3 +9,5 @@ d367a01a18a3ae6bee13d8be3b63fd6a581ea46f
 6ca9c76adb6daf2695d603ad623a9cf1c4f4806f
 # Fix unnecessary exploded black formatting (#7709)
 a335d916db0694770e8152f41e19195de3134523
+# Renaming: `BoundingBox` -> `BoundingBoxes` (#7778)
+332bff937c6711666191880fab57fa2f23ae772e
diff --git a/docs/source/datapoints.rst b/docs/source/datapoints.rst
@@ -15,5 +15,5 @@ see e.g. :ref:`sphx_glr_auto_examples_plot_transforms_v2_e2e.py`.
     Image
     Video
     BoundingBoxFormat
-    BoundingBox
+    BoundingBoxes
     Mask
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
@@ -206,8 +206,8 @@ Miscellaneous
     v2.RandomErasing
     Lambda
     v2.Lambda
-    v2.SanitizeBoundingBox
-    v2.ClampBoundingBox
+    v2.SanitizeBoundingBoxes
+    v2.ClampBoundingBoxes
     v2.UniformTemporalSubsample
 
 .. _conversion_transforms:

diff --git a/gallery/plot_datapoints.py b/gallery/plot_datapoints.py
@@ -47,7 +47,7 @@
 #
 # * :class:`~torchvision.datapoints.Image`
 # * :class:`~torchvision.datapoints.Video`
-# * :class:`~torchvision.datapoints.BoundingBox`
+# * :class:`~torchvision.datapoints.BoundingBoxes`
 # * :class:`~torchvision.datapoints.Mask`
 #
 # How do I construct a datapoint?
@@ -76,10 +76,10 @@
 
 ########################################################################################################################
 # In general, the datapoints can also store additional metadata that complements the underlying tensor. For example,
-# :class:`~torchvision.datapoints.BoundingBox` stores the coordinate format as well as the spatial size of the
+# :class:`~torchvision.datapoints.BoundingBoxes` stores the coordinate format as well as the spatial size of the
 # corresponding image alongside the actual values:
 
-bounding_box = datapoints.BoundingBox(
+bounding_box = datapoints.BoundingBoxes(
     [17, 16, 344, 495], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=image.shape[-2:]
 )
 print(bounding_box)
@@ -105,7 +105,7 @@ class PennFudanDataset(torch.utils.data.Dataset):
     def __getitem__(self, item):
         ...
 
-        target["boxes"] = datapoints.BoundingBox(
+        target["boxes"] = datapoints.BoundingBoxes(
             boxes,
             format=datapoints.BoundingBoxFormat.XYXY,
             spatial_size=F.get_spatial_size(img),
@@ -126,7 +126,7 @@ def __getitem__(self, item):
 
 class WrapPennFudanDataset:
     def __call__(self, img, target):
-        target["boxes"] = datapoints.BoundingBox(
+        target["boxes"] = datapoints.BoundingBoxes(
             target["boxes"],
             format=datapoints.BoundingBoxFormat.XYXY,
             spatial_size=F.get_spatial_size(img),
@@ -147,7 +147,7 @@ def get_transform(train):
 ########################################################################################################################
 # .. note::
 #
-#    If both :class:`~torchvision.datapoints.BoundingBox`'es and :class:`~torchvision.datapoints.Mask`'s are included in
+#    If both :class:`~torchvision.datapoints.BoundingBoxes`'es and :class:`~torchvision.datapoints.Mask`'s are included in
 #    the sample, ``torchvision.transforms.v2`` will transform them both. Meaning, if you don't need both, dropping or
 #    at least not wrapping the obsolete parts, can lead to a significant performance boost.
 #

diff --git a/gallery/plot_transforms_v2.py b/gallery/plot_transforms_v2.py
@@ -29,7 +29,7 @@ def load_data():
 
     masks = datapoints.Mask(merged_masks == labels.view(-1, 1, 1))
 
-    bounding_boxes = datapoints.BoundingBox(
+    bounding_boxes = datapoints.BoundingBoxes(
         masks_to_boxes(masks), format=datapoints.BoundingBoxFormat.XYXY, spatial_size=image.shape[-2:]
     )
 

diff --git a/gallery/plot_transforms_v2_e2e.py b/gallery/plot_transforms_v2_e2e.py
@@ -106,13 +106,13 @@ def load_example_coco_detection_dataset(**kwargs):
         transforms.RandomHorizontalFlip(),
         transforms.ToImageTensor(),
         transforms.ConvertImageDtype(torch.float32),
-        transforms.SanitizeBoundingBox(),
+        transforms.SanitizeBoundingBoxes(),
     ]
 )
 
 ########################################################################################################################
 # .. note::
-#    Although the :class:`~torchvision.transforms.v2.SanitizeBoundingBox` transform is a no-op in this example, but it
+#    Although the :class:`~torchvision.transforms.v2.SanitizeBoundingBoxes` transform is a no-op in this example, but it
 #    should be placed at least once at the end of a detection pipeline to remove degenerate bounding boxes as well as
 #    the corresponding labels and optionally masks. It is particularly critical to add it if
 #    :class:`~torchvision.transforms.v2.RandomIoUCrop` was used.

diff --git a/references/detection/presets.py b/references/detection/presets.py
@@ -78,7 +78,7 @@ def __init__(
         if use_v2:
             transforms += [
                 T.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.XYXY),
-                T.SanitizeBoundingBox(),
+                T.SanitizeBoundingBoxes(),
             ]
 
         self.transforms = T.Compose(transforms)

diff --git a/test/common_utils.py b/test/common_utils.py
@@ -620,7 +620,7 @@ def make_image_loaders_for_interpolation(
 
 
 @dataclasses.dataclass
-class BoundingBoxLoader(TensorLoader):
+class BoundingBoxesLoader(TensorLoader):
     format: datapoints.BoundingBoxFormat
     spatial_size: Tuple[int, int]
 
@@ -639,25 +639,25 @@ def make_bounding_box(
         - (box[3] - box[1], box[2] - box[0]) for XYXY
         - (H, W) for XYWH and CXCYWH
     spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on
-        returned datapoints.BoundingBox
+        returned datapoints.BoundingBoxes
 
     To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker
     functions, e.g.
 
     .. code::
 
         image = make_image=(size=size)
-        bounding_box = make_bounding_box(spatial_size=size)
-        assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image)
+        bounding_boxes = make_bounding_box(spatial_size=size)
+        assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
 
     For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all
     other maker functions, e.g.
 
     .. code::
 
         image = make_image=()
-        bounding_box = make_bounding_box()
-        assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image)
+        bounding_boxes = make_bounding_box()
+        assert F.get_spatial_size(bounding_boxes) == F.get_spatial_size(image)
     """
 
     def sample_position(values, max_value):
@@ -679,7 +679,7 @@ def sample_position(values, max_value):
     dtype = dtype or torch.float32
 
     if any(dim == 0 for dim in batch_dims):
-        return datapoints.BoundingBox(
+        return datapoints.BoundingBoxes(
             torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
         )
 
@@ -705,7 +705,7 @@ def sample_position(values, max_value):
     else:
         raise ValueError(f"Format {format} is not supported")
 
-    return datapoints.BoundingBox(
+    return datapoints.BoundingBoxes(
         torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
     )
 
@@ -725,7 +725,7 @@ def fn(shape, dtype, device):
             format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device
         )
 
-    return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
+    return BoundingBoxesLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
 
 
 def make_bounding_box_loaders(

diff --git a/test/test_datapoints.py b/test/test_datapoints.py
@@ -27,7 +27,7 @@ def test_mask_instance(data):
     "format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH]
 )
 def test_bbox_instance(data, format):
-    bboxes = datapoints.BoundingBox(data, format=format, spatial_size=(32, 32))
+    bboxes = datapoints.BoundingBoxes(data, format=format, spatial_size=(32, 32))
     assert isinstance(bboxes, torch.Tensor)
     assert bboxes.ndim == 2 and bboxes.shape[1] == 4
     if isinstance(format, str):
@@ -164,7 +164,7 @@ def test_wrap_like():
     [
         datapoints.Image(torch.rand(3, 16, 16)),
         datapoints.Video(torch.rand(2, 3, 16, 16)),
-        datapoints.BoundingBox([0.0, 1.0, 2.0, 3.0], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)),
+        datapoints.BoundingBoxes([0.0, 1.0, 2.0, 3.0], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)),
         datapoints.Mask(torch.randint(0, 256, (16, 16), dtype=torch.uint8)),
     ],
 )

diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -20,7 +20,7 @@
 
 from prototype_common_utils import make_label, make_one_hot_labels
 
-from torchvision.datapoints import BoundingBox, BoundingBoxFormat, Image, Mask, Video
+from torchvision.datapoints import BoundingBoxes, BoundingBoxFormat, Image, Mask, Video
 from torchvision.prototype import datapoints, transforms
 from torchvision.transforms.v2._utils import _convert_fill_arg
 from torchvision.transforms.v2.functional import InterpolationMode, pil_to_tensor, to_image_pil
@@ -101,10 +101,10 @@ def test__extract_image_targets_assertion(self, mocker):
             self.create_fake_image(mocker, Image),
             # labels, bboxes, masks
             mocker.MagicMock(spec=datapoints.Label),
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BoundingBoxes),
             mocker.MagicMock(spec=Mask),
             # labels, bboxes, masks
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BoundingBoxes),
             mocker.MagicMock(spec=Mask),
         ]
 
@@ -122,11 +122,11 @@ def test__extract_image_targets(self, image_type, label_type, mocker):
             self.create_fake_image(mocker, image_type),
             # labels, bboxes, masks
             mocker.MagicMock(spec=label_type),
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BoundingBoxes),
             mocker.MagicMock(spec=Mask),
             # labels, bboxes, masks
             mocker.MagicMock(spec=label_type),
-            mocker.MagicMock(spec=BoundingBox),
+            mocker.MagicMock(spec=BoundingBoxes),
             mocker.MagicMock(spec=Mask),
         ]
 
@@ -142,7 +142,7 @@ def test__extract_image_targets(self, image_type, label_type, mocker):
 
         for target in targets:
             for key, type_ in [
-                ("boxes", BoundingBox),
+                ("boxes", BoundingBoxes),
                 ("masks", Mask),
                 ("labels", label_type),
             ]:
@@ -163,7 +163,7 @@ def test__copy_paste(self, label_type):
         if label_type == datapoints.OneHotLabel:
             labels = torch.nn.functional.one_hot(labels, num_classes=5)
         target = {
-            "boxes": BoundingBox(
+            "boxes": BoundingBoxes(
                 torch.tensor([[2.0, 3.0, 8.0, 9.0], [20.0, 20.0, 30.0, 30.0]]), format="XYXY", spatial_size=(32, 32)
             ),
             "masks": Mask(masks),
@@ -178,7 +178,7 @@ def test__copy_paste(self, label_type):
         if label_type == datapoints.OneHotLabel:
             paste_labels = torch.nn.functional.one_hot(paste_labels, num_classes=5)
         paste_target = {
-            "boxes": BoundingBox(
+            "boxes": BoundingBoxes(
                 torch.tensor([[12.0, 13.0, 19.0, 18.0], [1.0, 15.0, 8.0, 19.0]]), format="XYXY", spatial_size=(32, 32)
             ),
             "masks": Mask(paste_masks),
@@ -332,7 +332,7 @@ def test__transform_culling(self, mocker):
         assert_equal(output["masks"], masks[is_valid])
         assert_equal(output["labels"], labels[is_valid])
 
-    def test__transform_bounding_box_clamping(self, mocker):
+    def test__transform_bounding_boxes_clamping(self, mocker):
         batch_size = 3
         spatial_size = (10, 10)
 
@@ -349,15 +349,15 @@ def test__transform_bounding_box_clamping(self, mocker):
             ),
         )
 
-        bounding_box = make_bounding_box(
+        bounding_boxes = make_bounding_box(
             format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
         )
-        mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box")
+        mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_boxes")
 
         transform = transforms.FixedSizeCrop((-1, -1))
         mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True)
 
-        transform(bounding_box)
+        transform(bounding_boxes)
 
         mock.assert_called_once()
 
@@ -390,7 +390,7 @@ class TestPermuteDimensions:
     def test_call(self, dims, inverse_dims):
         sample = dict(
             image=make_image(),
-            bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
+            bounding_boxes=make_bounding_box(format=BoundingBoxFormat.XYXY),
             video=make_video(),
             str="str",
             int=0,
@@ -434,7 +434,7 @@ class TestTransposeDimensions:
     def test_call(self, dims):
         sample = dict(
             image=make_image(),
-            bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY),
+            bounding_boxes=make_bounding_box(format=BoundingBoxFormat.XYXY),
             video=make_video(),
             str="str",
             int=0,