2024-09-04 nightly release (838ad6c)

pytorch · Sep 4, 2024 · fb60363 · fb60363
1 parent 394ca56
commit fb60363
Show file tree

Hide file tree

Showing 9 changed files with 96 additions and 65 deletions.
diff --git a/docs/source/io.rst b/docs/source/io.rst
@@ -19,7 +19,6 @@ For encoding, JPEG (cpu and CUDA) and PNG are supported.
     :toctree: generated/
     :template: function.rst
 
-    read_image
     decode_image
     encode_jpeg
     decode_jpeg
@@ -38,6 +37,13 @@ For encoding, JPEG (cpu and CUDA) and PNG are supported.
 
     ImageReadMode
 
+Obsolete decoding function:
+
+.. autosummary::
+    :toctree: generated/
+    :template: class.rst
+
+    read_image
 
 
 Video

diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -226,10 +226,10 @@ Here is an example of how to use the pre-trained image classification models:
 
 .. code:: python
 
-    from torchvision.io import read_image
+    from torchvision.io import decode_image
     from torchvision.models import resnet50, ResNet50_Weights
 
-    img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
+    img = decode_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
 
     # Step 1: Initialize model with the best available weights
     weights = ResNet50_Weights.DEFAULT
@@ -283,10 +283,10 @@ Here is an example of how to use the pre-trained quantized image classification
 
 .. code:: python
 
-    from torchvision.io import read_image
+    from torchvision.io import decode_image
     from torchvision.models.quantization import resnet50, ResNet50_QuantizedWeights
 
-    img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
+    img = decode_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
 
     # Step 1: Initialize model with the best available weights
     weights = ResNet50_QuantizedWeights.DEFAULT
@@ -339,11 +339,11 @@ Here is an example of how to use the pre-trained semantic segmentation models:
 
 .. code:: python
 
-    from torchvision.io.image import read_image
+    from torchvision.io.image import decode_image
     from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
     from torchvision.transforms.functional import to_pil_image
 
-    img = read_image("gallery/assets/dog1.jpg")
+    img = decode_image("gallery/assets/dog1.jpg")
 
     # Step 1: Initialize model with the best available weights
     weights = FCN_ResNet50_Weights.DEFAULT
@@ -411,12 +411,12 @@ Here is an example of how to use the pre-trained object detection models:
 .. code:: python
 
 
-    from torchvision.io.image import read_image
+    from torchvision.io.image import decode_image
     from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
     from torchvision.utils import draw_bounding_boxes
     from torchvision.transforms.functional import to_pil_image
 
-    img = read_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
+    img = decode_image("test/assets/encode_jpeg/grace_hopper_517x606.jpg")
 
     # Step 1: Initialize model with the best available weights
     weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT

diff --git a/gallery/others/plot_repurposing_annotations.py b/gallery/others/plot_repurposing_annotations.py
@@ -66,12 +66,12 @@ def show(imgs):
 # We will take images and masks from the `PenFudan Dataset <https://www.cis.upenn.edu/~jshi/ped_html/>`_.
 
 
-from torchvision.io import read_image
+from torchvision.io import decode_image
 
 img_path = os.path.join(ASSETS_DIRECTORY, "FudanPed00054.png")
 mask_path = os.path.join(ASSETS_DIRECTORY, "FudanPed00054_mask.png")
-img = read_image(img_path)
-mask = read_image(mask_path)
+img = decode_image(img_path)
+mask = decode_image(mask_path)
 
 
 # %%
@@ -181,8 +181,8 @@ def __getitem__(self, idx):
         img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
         mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
 
-        img = read_image(img_path)
-        mask = read_image(mask_path)
+        img = decode_image(img_path)
+        mask = decode_image(mask_path)
 
         img = F.convert_image_dtype(img, dtype=torch.float)
         mask = F.convert_image_dtype(mask, dtype=torch.float)

diff --git a/gallery/others/plot_scripted_tensor_transforms.py b/gallery/others/plot_scripted_tensor_transforms.py
@@ -21,7 +21,7 @@
 import torch.nn as nn
 
 import torchvision.transforms as v1
-from torchvision.io import read_image
+from torchvision.io import decode_image
 
 plt.rcParams["savefig.bbox"] = 'tight'
 torch.manual_seed(1)
@@ -39,8 +39,8 @@
 # :class:`torch.nn.Sequential` instead of
 # :class:`~torchvision.transforms.v2.Compose`:
 
-dog1 = read_image(str(ASSETS_PATH / 'dog1.jpg'))
-dog2 = read_image(str(ASSETS_PATH / 'dog2.jpg'))
+dog1 = decode_image(str(ASSETS_PATH / 'dog1.jpg'))
+dog2 = decode_image(str(ASSETS_PATH / 'dog2.jpg'))
 
 transforms = torch.nn.Sequential(
     v1.RandomCrop(224),

diff --git a/gallery/others/plot_visualization_utils.py b/gallery/others/plot_visualization_utils.py
@@ -42,11 +42,11 @@ def show(imgs):
 # image of dtype ``uint8`` as input.
 
 from torchvision.utils import make_grid
-from torchvision.io import read_image
+from torchvision.io import decode_image
 from pathlib import Path
 
-dog1_int = read_image(str(Path('../assets') / 'dog1.jpg'))
-dog2_int = read_image(str(Path('../assets') / 'dog2.jpg'))
+dog1_int = decode_image(str(Path('../assets') / 'dog1.jpg'))
+dog2_int = decode_image(str(Path('../assets') / 'dog2.jpg'))
 dog_list = [dog1_int, dog2_int]
 
 grid = make_grid(dog_list)
@@ -362,9 +362,9 @@ def show(imgs):
 #
 
 from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
-from torchvision.io import read_image
+from torchvision.io import decode_image
 
-person_int = read_image(str(Path("../assets") / "person1.jpg"))
+person_int = decode_image(str(Path("../assets") / "person1.jpg"))
 
 weights = KeypointRCNN_ResNet50_FPN_Weights.DEFAULT
 transforms = weights.transforms()

diff --git a/gallery/transforms/plot_transforms_getting_started.py b/gallery/transforms/plot_transforms_getting_started.py
@@ -21,14 +21,14 @@
 plt.rcParams["savefig.bbox"] = 'tight'
 
 from torchvision.transforms import v2
-from torchvision.io import read_image
+from torchvision.io import decode_image
 
 torch.manual_seed(1)
 
 # If you're trying to run that on Colab, you can download the assets and the
 # helpers from https://github.com/pytorch/vision/tree/main/gallery/
 from helpers import plot
-img = read_image(str(Path('../assets') / 'astronaut.jpg'))
+img = decode_image(str(Path('../assets') / 'astronaut.jpg'))
 print(f"{type(img) = }, {img.dtype = }, {img.shape = }")
 
 # %%

diff --git a/test/smoke_test.py b/test/smoke_test.py
@@ -6,7 +6,7 @@
 
 import torch
 import torchvision
-from torchvision.io import decode_jpeg, decode_webp, read_file, read_image
+from torchvision.io import decode_image, decode_jpeg, decode_webp, read_file
 from torchvision.models import resnet50, ResNet50_Weights
 
 
@@ -21,13 +21,13 @@ def smoke_test_torchvision() -> None:
 
 
 def smoke_test_torchvision_read_decode() -> None:
-    img_jpg = read_image(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg"))
+    img_jpg = decode_image(str(SCRIPT_DIR / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg"))
     if img_jpg.shape != (3, 606, 517):
         raise RuntimeError(f"Unexpected shape of img_jpg: {img_jpg.shape}")
-    img_png = read_image(str(SCRIPT_DIR / "assets" / "interlaced_png" / "wizard_low.png"))
+    img_png = decode_image(str(SCRIPT_DIR / "assets" / "interlaced_png" / "wizard_low.png"))
     if img_png.shape != (4, 471, 354):
         raise RuntimeError(f"Unexpected shape of img_png: {img_png.shape}")
-    img_webp = read_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp"))
+    img_webp = decode_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp"))
     if img_webp.shape != (3, 100, 100):
         raise RuntimeError(f"Unexpected shape of img_webp: {img_webp.shape}")
 
@@ -54,7 +54,7 @@ def smoke_test_compile() -> None:
 
 
 def smoke_test_torchvision_resnet50_classify(device: str = "cpu") -> None:
-    img = read_image(str(SCRIPT_DIR / ".." / "gallery" / "assets" / "dog2.jpg")).to(device)
+    img = decode_image(str(SCRIPT_DIR / ".." / "gallery" / "assets" / "dog2.jpg")).to(device)
 
     # Step 1: Initialize model with the best available weights
     weights = ResNet50_Weights.DEFAULT

diff --git a/test/test_image.py b/test/test_image.py
@@ -1044,5 +1044,37 @@ def test_decode_heic(decode_fun, scripted):
     img += 123  # make sure image buffer wasn't freed by underlying decoding lib
 
 
+@pytest.mark.parametrize("input_type", ("Path", "str", "tensor"))
+@pytest.mark.parametrize("scripted", (False, True))
+def test_decode_image_path(input_type, scripted):
+    # Check that decode_image can support not just tensors as input
+    path = next(get_images(IMAGE_ROOT, ".jpg"))
+    if input_type == "Path":
+        input = Path(path)
+    elif input_type == "str":
+        input = path
+    elif input_type == "tensor":
+        input = read_file(path)
+    else:
+        raise ValueError("Oops")
+
+    if scripted and input_type == "Path":
+        pytest.xfail(reason="Can't pass a Path when scripting")
+
+    decode_fun = torch.jit.script(decode_image) if scripted else decode_image
+    decode_fun(input)
+
+
+def test_mode_str():
+    # Make sure decode_image supports string modes. We just test decode_image,
+    # not all of the decoding functions, but they should all support that too.
+    # Torchscript fails when passing strings, which is expected.
+    path = next(get_images(IMAGE_ROOT, ".png"))
+    assert decode_image(path, mode="RGB").shape[0] == 3
+    assert decode_image(path, mode="rGb").shape[0] == 3
+    assert decode_image(path, mode="GRAY").shape[0] == 1
+    assert decode_image(path, mode="RGBA").shape[0] == 4
+
+
 if __name__ == "__main__":
     pytest.main([__file__])