From 6155808fd8ed4f6af2ed5873c1ca9551e820c63c Mon Sep 17 00:00:00 2001 From: vfdev Date: Thu, 23 Jun 2022 16:27:56 +0200 Subject: [PATCH] [proto] Improvements for functional API and tests (#6187) * Added base tests for rotate_image_tensor * Updated resize_image_tensor API and tests and fixed a bug with max_size * Refactored and modified private api for resize functional op * Fixed failures * More updates * Updated proto functional op: resize_image_* * Added max_size arg to resize_bounding_box and updated basic tests * Update functional.py * Reverted fill/center order for rotate Other nits --- test/test_prototype_transforms_functional.py | 56 ++++++++++++++++--- .../transforms/functional/_geometry.py | 27 +++++---- 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index be3932a8b7f..30d9b833ec8 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -201,32 +201,58 @@ def horizontal_flip_bounding_box(): @register_kernel_info_from_sample_inputs_fn def resize_image_tensor(): - for image, interpolation in itertools.product( + for image, interpolation, max_size, antialias in itertools.product( make_images(), - [ - F.InterpolationMode.BILINEAR, - F.InterpolationMode.NEAREST, - ], + [F.InterpolationMode.BILINEAR, F.InterpolationMode.NEAREST], # interpolation + [None, 34], # max_size + [False, True], # antialias ): + + if antialias and interpolation == F.InterpolationMode.NEAREST: + continue + height, width = image.shape[-2:] for size in [ (height, width), (int(height * 0.75), int(width * 1.25)), ]: - yield SampleInput(image, size=size, interpolation=interpolation) + if max_size is not None: + size = [size[0]] + yield SampleInput(image, size=size, interpolation=interpolation, max_size=max_size, antialias=antialias) @register_kernel_info_from_sample_inputs_fn def resize_bounding_box(): - for bounding_box in make_bounding_boxes(): + for bounding_box, max_size in itertools.product( + make_bounding_boxes(), + [None, 34], # max_size + ): height, width = bounding_box.image_size for size in [ (height, width), (int(height * 0.75), int(width * 1.25)), ]: + if max_size is not None: + size = [size[0]] yield SampleInput(bounding_box, size=size, image_size=bounding_box.image_size) +@register_kernel_info_from_sample_inputs_fn +def resize_segmentation_mask(): + for mask, max_size in itertools.product( + make_segmentation_masks(), + [None, 34], # max_size + ): + height, width = mask.shape[-2:] + for size in [ + (height, width), + (int(height * 0.75), int(width * 1.25)), + ]: + if max_size is not None: + size = [size[0]] + yield SampleInput(mask, size=size, max_size=max_size) + + @register_kernel_info_from_sample_inputs_fn def affine_image_tensor(): for image, angle, translate, scale, shear in itertools.product( @@ -284,6 +310,22 @@ def affine_segmentation_mask(): ) +@register_kernel_info_from_sample_inputs_fn +def rotate_image_tensor(): + for image, angle, expand, center, fill in itertools.product( + make_images(extra_dims=((), (4,))), + [-87, 15, 90], # angle + [True, False], # expand + [None, [12, 23]], # center + [None, [128]], # fill + ): + if center is not None and expand: + # Skip warning: The provided center argument is ignored if expand is True + continue + + yield SampleInput(image, angle=angle, expand=expand, center=center, fill=fill) + + @register_kernel_info_from_sample_inputs_fn def rotate_bounding_box(): for bounding_box, angle, expand, center in itertools.product( diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index f1d51fded82..95e094ad798 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -6,7 +6,12 @@ import torch from torchvision.prototype import features from torchvision.transforms import functional_tensor as _FT, functional_pil as _FP -from torchvision.transforms.functional import pil_modes_mapping, _get_inverse_affine_matrix, InterpolationMode +from torchvision.transforms.functional import ( + pil_modes_mapping, + _get_inverse_affine_matrix, + InterpolationMode, + _compute_output_size, +) from ._meta import convert_bounding_box_format, get_dimensions_image_tensor, get_dimensions_image_pil @@ -42,14 +47,12 @@ def resize_image_tensor( max_size: Optional[int] = None, antialias: Optional[bool] = None, ) -> torch.Tensor: - # TODO: use _compute_output_size to enable max_size option - max_size # ununsed right now - new_height, new_width = size num_channels, old_height, old_width = get_dimensions_image_tensor(image) + new_height, new_width = _compute_output_size((old_height, old_width), size=size, max_size=max_size) batch_shape = image.shape[:-3] return _FT.resize( image.reshape((-1, num_channels, old_height, old_width)), - size=size, + size=[new_height, new_width], interpolation=interpolation.value, antialias=antialias, ).reshape(batch_shape + (num_channels, new_height, new_width)) @@ -61,8 +64,11 @@ def resize_image_pil( interpolation: InterpolationMode = InterpolationMode.BILINEAR, max_size: Optional[int] = None, ) -> PIL.Image.Image: - # TODO: use _compute_output_size to enable max_size option - max_size # ununsed right now + if isinstance(size, int): + size = [size, size] + # Explicitly cast size to list otherwise mypy issue: incompatible type "Sequence[int]"; expected "List[int]" + size: List[int] = list(size) + size = _compute_output_size(img.size[::-1], size=size, max_size=max_size) return _FP.resize(img, size, interpolation=pil_modes_mapping[interpolation]) @@ -72,10 +78,11 @@ def resize_segmentation_mask( return resize_image_tensor(segmentation_mask, size=size, interpolation=InterpolationMode.NEAREST, max_size=max_size) -# TODO: handle max_size -def resize_bounding_box(bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int]) -> torch.Tensor: +def resize_bounding_box( + bounding_box: torch.Tensor, size: List[int], image_size: Tuple[int, int], max_size: Optional[int] = None +) -> torch.Tensor: old_height, old_width = image_size - new_height, new_width = size + new_height, new_width = _compute_output_size(image_size, size=size, max_size=max_size) ratios = torch.tensor((new_width / old_width, new_height / old_height), device=bounding_box.device) return bounding_box.view(-1, 2, 2).mul(ratios).view(bounding_box.shape)