|
import math |
|
import numbers |
|
import sys |
|
import warnings |
|
from enum import Enum |
|
from typing import Any, List, Optional, Tuple, Union |
|
|
|
import numpy as np |
|
import torch |
|
from PIL import Image |
|
from PIL.Image import Image as PILImage |
|
from torch import Tensor |
|
|
|
try: |
|
import accimage |
|
except ImportError: |
|
accimage = None |
|
|
|
from ..utils import _log_api_usage_once |
|
from . import _functional_pil as F_pil, _functional_tensor as F_t |
|
|
|
|
|
class InterpolationMode(Enum): |
|
"""Interpolation modes |
|
Available interpolation methods are ``nearest``, ``nearest-exact``, ``bilinear``, ``bicubic``, ``box``, ``hamming``, |
|
and ``lanczos``. |
|
""" |
|
|
|
NEAREST = "nearest" |
|
NEAREST_EXACT = "nearest-exact" |
|
BILINEAR = "bilinear" |
|
BICUBIC = "bicubic" |
|
|
|
BOX = "box" |
|
HAMMING = "hamming" |
|
LANCZOS = "lanczos" |
|
|
|
|
|
|
|
|
|
def _interpolation_modes_from_int(i: int) -> InterpolationMode: |
|
inverse_modes_mapping = { |
|
0: InterpolationMode.NEAREST, |
|
2: InterpolationMode.BILINEAR, |
|
3: InterpolationMode.BICUBIC, |
|
4: InterpolationMode.BOX, |
|
5: InterpolationMode.HAMMING, |
|
1: InterpolationMode.LANCZOS, |
|
} |
|
return inverse_modes_mapping[i] |
|
|
|
|
|
pil_modes_mapping = { |
|
InterpolationMode.NEAREST: 0, |
|
InterpolationMode.BILINEAR: 2, |
|
InterpolationMode.BICUBIC: 3, |
|
InterpolationMode.NEAREST_EXACT: 0, |
|
InterpolationMode.BOX: 4, |
|
InterpolationMode.HAMMING: 5, |
|
InterpolationMode.LANCZOS: 1, |
|
} |
|
|
|
_is_pil_image = F_pil._is_pil_image |
|
|
|
|
|
def get_dimensions(img: Tensor) -> List[int]: |
|
"""Returns the dimensions of an image as [channels, height, width]. |
|
|
|
Args: |
|
img (PIL Image or Tensor): The image to be checked. |
|
|
|
Returns: |
|
List[int]: The image dimensions. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(get_dimensions) |
|
if isinstance(img, torch.Tensor): |
|
return F_t.get_dimensions(img) |
|
|
|
return F_pil.get_dimensions(img) |
|
|
|
|
|
def get_image_size(img: Tensor) -> List[int]: |
|
"""Returns the size of an image as [width, height]. |
|
|
|
Args: |
|
img (PIL Image or Tensor): The image to be checked. |
|
|
|
Returns: |
|
List[int]: The image size. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(get_image_size) |
|
if isinstance(img, torch.Tensor): |
|
return F_t.get_image_size(img) |
|
|
|
return F_pil.get_image_size(img) |
|
|
|
|
|
def get_image_num_channels(img: Tensor) -> int: |
|
"""Returns the number of channels of an image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): The image to be checked. |
|
|
|
Returns: |
|
int: The number of channels. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(get_image_num_channels) |
|
if isinstance(img, torch.Tensor): |
|
return F_t.get_image_num_channels(img) |
|
|
|
return F_pil.get_image_num_channels(img) |
|
|
|
|
|
@torch.jit.unused |
|
def _is_numpy(img: Any) -> bool: |
|
return isinstance(img, np.ndarray) |
|
|
|
|
|
@torch.jit.unused |
|
def _is_numpy_image(img: Any) -> bool: |
|
return img.ndim in {2, 3} |
|
|
|
|
|
def to_tensor(pic: Union[PILImage, np.ndarray]) -> Tensor: |
|
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. |
|
This function does not support torchscript. |
|
|
|
See :class:`~torchvision.transforms.ToTensor` for more details. |
|
|
|
Args: |
|
pic (PIL Image or numpy.ndarray): Image to be converted to tensor. |
|
|
|
Returns: |
|
Tensor: Converted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(to_tensor) |
|
if not (F_pil._is_pil_image(pic) or _is_numpy(pic)): |
|
raise TypeError(f"pic should be PIL Image or ndarray. Got {type(pic)}") |
|
|
|
if _is_numpy(pic) and not _is_numpy_image(pic): |
|
raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.") |
|
|
|
default_float_dtype = torch.get_default_dtype() |
|
|
|
if isinstance(pic, np.ndarray): |
|
|
|
if pic.ndim == 2: |
|
pic = pic[:, :, None] |
|
|
|
img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous() |
|
|
|
if isinstance(img, torch.ByteTensor): |
|
return img.to(dtype=default_float_dtype).div(255) |
|
else: |
|
return img |
|
|
|
if accimage is not None and isinstance(pic, accimage.Image): |
|
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) |
|
pic.copyto(nppic) |
|
return torch.from_numpy(nppic).to(dtype=default_float_dtype) |
|
|
|
|
|
mode_to_nptype = {"I": np.int32, "I;16" if sys.byteorder == "little" else "I;16B": np.int16, "F": np.float32} |
|
img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)) |
|
|
|
if pic.mode == "1": |
|
img = 255 * img |
|
img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic)) |
|
|
|
img = img.permute((2, 0, 1)).contiguous() |
|
if isinstance(img, torch.ByteTensor): |
|
return img.to(dtype=default_float_dtype).div(255) |
|
else: |
|
return img |
|
|
|
|
|
def pil_to_tensor(pic: Any) -> Tensor: |
|
"""Convert a ``PIL Image`` to a tensor of the same type. |
|
This function does not support torchscript. |
|
|
|
See :class:`~torchvision.transforms.PILToTensor` for more details. |
|
|
|
.. note:: |
|
|
|
A deep copy of the underlying array is performed. |
|
|
|
Args: |
|
pic (PIL Image): Image to be converted to tensor. |
|
|
|
Returns: |
|
Tensor: Converted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(pil_to_tensor) |
|
if not F_pil._is_pil_image(pic): |
|
raise TypeError(f"pic should be PIL Image. Got {type(pic)}") |
|
|
|
if accimage is not None and isinstance(pic, accimage.Image): |
|
|
|
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.uint8) |
|
pic.copyto(nppic) |
|
return torch.as_tensor(nppic) |
|
|
|
|
|
img = torch.as_tensor(np.array(pic, copy=True)) |
|
img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic)) |
|
|
|
img = img.permute((2, 0, 1)) |
|
return img |
|
|
|
|
|
def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor: |
|
"""Convert a tensor image to the given ``dtype`` and scale the values accordingly |
|
This function does not support PIL Image. |
|
|
|
Args: |
|
image (torch.Tensor): Image to be converted |
|
dtype (torch.dtype): Desired data type of the output |
|
|
|
Returns: |
|
Tensor: Converted image |
|
|
|
.. note:: |
|
|
|
When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly. |
|
If converted back and forth, this mismatch has no effect. |
|
|
|
Raises: |
|
RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as |
|
well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to |
|
overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range |
|
of the integer ``dtype``. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(convert_image_dtype) |
|
if not isinstance(image, torch.Tensor): |
|
raise TypeError("Input img should be Tensor Image") |
|
|
|
return F_t.convert_image_dtype(image, dtype) |
|
|
|
|
|
def to_pil_image(pic, mode=None): |
|
"""Convert a tensor or an ndarray to PIL Image. This function does not support torchscript. |
|
|
|
See :class:`~torchvision.transforms.ToPILImage` for more details. |
|
|
|
Args: |
|
pic (Tensor or numpy.ndarray): Image to be converted to PIL Image. |
|
mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). |
|
|
|
.. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes |
|
|
|
Returns: |
|
PIL Image: Image converted to PIL Image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(to_pil_image) |
|
|
|
if isinstance(pic, torch.Tensor): |
|
if pic.ndim == 3: |
|
pic = pic.permute((1, 2, 0)) |
|
pic = pic.numpy(force=True) |
|
elif not isinstance(pic, np.ndarray): |
|
raise TypeError(f"pic should be Tensor or ndarray. Got {type(pic)}.") |
|
|
|
if pic.ndim == 2: |
|
|
|
pic = np.expand_dims(pic, 2) |
|
if pic.ndim != 3: |
|
raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.") |
|
|
|
if pic.shape[-1] > 4: |
|
raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-1]} channels.") |
|
|
|
npimg = pic |
|
|
|
if np.issubdtype(npimg.dtype, np.floating) and mode != "F": |
|
npimg = (npimg * 255).astype(np.uint8) |
|
|
|
if npimg.shape[2] == 1: |
|
expected_mode = None |
|
npimg = npimg[:, :, 0] |
|
if npimg.dtype == np.uint8: |
|
expected_mode = "L" |
|
elif npimg.dtype == np.int16: |
|
expected_mode = "I;16" if sys.byteorder == "little" else "I;16B" |
|
elif npimg.dtype == np.int32: |
|
expected_mode = "I" |
|
elif npimg.dtype == np.float32: |
|
expected_mode = "F" |
|
if mode is not None and mode != expected_mode: |
|
raise ValueError(f"Incorrect mode ({mode}) supplied for input type {np.dtype}. Should be {expected_mode}") |
|
mode = expected_mode |
|
|
|
elif npimg.shape[2] == 2: |
|
permitted_2_channel_modes = ["LA"] |
|
if mode is not None and mode not in permitted_2_channel_modes: |
|
raise ValueError(f"Only modes {permitted_2_channel_modes} are supported for 2D inputs") |
|
|
|
if mode is None and npimg.dtype == np.uint8: |
|
mode = "LA" |
|
|
|
elif npimg.shape[2] == 4: |
|
permitted_4_channel_modes = ["RGBA", "CMYK", "RGBX"] |
|
if mode is not None and mode not in permitted_4_channel_modes: |
|
raise ValueError(f"Only modes {permitted_4_channel_modes} are supported for 4D inputs") |
|
|
|
if mode is None and npimg.dtype == np.uint8: |
|
mode = "RGBA" |
|
else: |
|
permitted_3_channel_modes = ["RGB", "YCbCr", "HSV"] |
|
if mode is not None and mode not in permitted_3_channel_modes: |
|
raise ValueError(f"Only modes {permitted_3_channel_modes} are supported for 3D inputs") |
|
if mode is None and npimg.dtype == np.uint8: |
|
mode = "RGB" |
|
|
|
if mode is None: |
|
raise TypeError(f"Input type {npimg.dtype} is not supported") |
|
|
|
return Image.fromarray(npimg, mode=mode) |
|
|
|
|
|
def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor: |
|
"""Normalize a float tensor image with mean and standard deviation. |
|
This transform does not support PIL Image. |
|
|
|
.. note:: |
|
This transform acts out of place by default, i.e., it does not mutates the input tensor. |
|
|
|
See :class:`~torchvision.transforms.Normalize` for more details. |
|
|
|
Args: |
|
tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized. |
|
mean (sequence): Sequence of means for each channel. |
|
std (sequence): Sequence of standard deviations for each channel. |
|
inplace(bool,optional): Bool to make this operation inplace. |
|
|
|
Returns: |
|
Tensor: Normalized Tensor image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(normalize) |
|
if not isinstance(tensor, torch.Tensor): |
|
raise TypeError(f"img should be Tensor Image. Got {type(tensor)}") |
|
|
|
return F_t.normalize(tensor, mean=mean, std=std, inplace=inplace) |
|
|
|
|
|
def _compute_resized_output_size( |
|
image_size: Tuple[int, int], |
|
size: Optional[List[int]], |
|
max_size: Optional[int] = None, |
|
allow_size_none: bool = False, |
|
) -> List[int]: |
|
h, w = image_size |
|
short, long = (w, h) if w <= h else (h, w) |
|
if size is None: |
|
if not allow_size_none: |
|
raise ValueError("This should never happen!!") |
|
if not isinstance(max_size, int): |
|
raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.") |
|
new_short, new_long = int(max_size * short / long), max_size |
|
new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) |
|
elif len(size) == 1: |
|
requested_new_short = size if isinstance(size, int) else size[0] |
|
new_short, new_long = requested_new_short, int(requested_new_short * long / short) |
|
|
|
if max_size is not None: |
|
if max_size <= requested_new_short: |
|
raise ValueError( |
|
f"max_size = {max_size} must be strictly greater than the requested " |
|
f"size for the smaller edge size = {size}" |
|
) |
|
if new_long > max_size: |
|
new_short, new_long = int(max_size * new_short / new_long), max_size |
|
|
|
new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) |
|
else: |
|
new_w, new_h = size[1], size[0] |
|
return [new_h, new_w] |
|
|
|
|
|
def resize( |
|
img: Tensor, |
|
size: List[int], |
|
interpolation: InterpolationMode = InterpolationMode.BILINEAR, |
|
max_size: Optional[int] = None, |
|
antialias: Optional[bool] = True, |
|
) -> Tensor: |
|
r"""Resize the input image to the given size. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be resized. |
|
size (sequence or int): Desired output size. If size is a sequence like |
|
(h, w), the output size will be matched to this. If size is an int, |
|
the smaller edge of the image will be matched to this number maintaining |
|
the aspect ratio. i.e, if height > width, then image will be rescaled to |
|
:math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`. |
|
|
|
.. note:: |
|
In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. |
|
interpolation (InterpolationMode): Desired interpolation enum defined by |
|
:class:`torchvision.transforms.InterpolationMode`. |
|
Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, |
|
``InterpolationMode.NEAREST_EXACT``, ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are |
|
supported. |
|
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. |
|
max_size (int, optional): The maximum allowed for the longer edge of |
|
the resized image. If the longer edge of the image is greater |
|
than ``max_size`` after being resized according to ``size``, |
|
``size`` will be overruled so that the longer edge is equal to |
|
``max_size``. |
|
As a result, the smaller edge may be shorter than ``size``. This |
|
is only supported if ``size`` is an int (or a sequence of length |
|
1 in torchscript mode). |
|
antialias (bool, optional): Whether to apply antialiasing. |
|
It only affects **tensors** with bilinear or bicubic modes and it is |
|
ignored otherwise: on PIL images, antialiasing is always applied on |
|
bilinear or bicubic modes; on other modes (for PIL images and |
|
tensors), antialiasing makes no sense and this parameter is ignored. |
|
Possible values are: |
|
|
|
- ``True`` (default): will apply antialiasing for bilinear or bicubic modes. |
|
Other mode aren't affected. This is probably what you want to use. |
|
- ``False``: will not apply antialiasing for tensors on any mode. PIL |
|
images are still antialiased on bilinear or bicubic modes, because |
|
PIL doesn't support no antialias. |
|
- ``None``: equivalent to ``False`` for tensors and ``True`` for |
|
PIL images. This value exists for legacy reasons and you probably |
|
don't want to use it unless you really know what you are doing. |
|
|
|
The default value changed from ``None`` to ``True`` in |
|
v0.17, for the PIL and Tensor backends to be consistent. |
|
|
|
Returns: |
|
PIL Image or Tensor: Resized image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(resize) |
|
|
|
if isinstance(interpolation, int): |
|
interpolation = _interpolation_modes_from_int(interpolation) |
|
elif not isinstance(interpolation, InterpolationMode): |
|
raise TypeError( |
|
"Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant" |
|
) |
|
|
|
if isinstance(size, (list, tuple)): |
|
if len(size) not in [1, 2]: |
|
raise ValueError( |
|
f"Size must be an int or a 1 or 2 element tuple/list, not a {len(size)} element tuple/list" |
|
) |
|
if max_size is not None and len(size) != 1: |
|
raise ValueError( |
|
"max_size should only be passed if size specifies the length of the smaller edge, " |
|
"i.e. size should be an int or a sequence of length 1 in torchscript mode." |
|
) |
|
|
|
_, image_height, image_width = get_dimensions(img) |
|
if isinstance(size, int): |
|
size = [size] |
|
output_size = _compute_resized_output_size((image_height, image_width), size, max_size) |
|
|
|
if [image_height, image_width] == output_size: |
|
return img |
|
|
|
if not isinstance(img, torch.Tensor): |
|
if antialias is False: |
|
warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.") |
|
pil_interpolation = pil_modes_mapping[interpolation] |
|
return F_pil.resize(img, size=output_size, interpolation=pil_interpolation) |
|
|
|
return F_t.resize(img, size=output_size, interpolation=interpolation.value, antialias=antialias) |
|
|
|
|
|
def pad(img: Tensor, padding: List[int], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor: |
|
r"""Pad the given image on all sides with the given "pad" value. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric, |
|
at most 3 leading dimensions for mode edge, |
|
and an arbitrary number of leading dimensions for mode constant |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be padded. |
|
padding (int or sequence): Padding on each border. If a single int is provided this |
|
is used to pad all borders. If sequence of length 2 is provided this is the padding |
|
on left/right and top/bottom respectively. If a sequence of length 4 is provided |
|
this is the padding for the left, top, right and bottom borders respectively. |
|
|
|
.. note:: |
|
In torchscript mode padding as single int is not supported, use a sequence of |
|
length 1: ``[padding, ]``. |
|
fill (number or tuple): Pixel fill value for constant fill. Default is 0. |
|
If a tuple of length 3, it is used to fill R, G, B channels respectively. |
|
This value is only used when the padding_mode is constant. |
|
Only number is supported for torch Tensor. |
|
Only int or tuple value is supported for PIL Image. |
|
padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. |
|
Default is constant. |
|
|
|
- constant: pads with a constant value, this value is specified with fill |
|
|
|
- edge: pads with the last value at the edge of the image. |
|
If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2 |
|
|
|
- reflect: pads with reflection of image without repeating the last value on the edge. |
|
For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode |
|
will result in [3, 2, 1, 2, 3, 4, 3, 2] |
|
|
|
- symmetric: pads with reflection of image repeating the last value on the edge. |
|
For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode |
|
will result in [2, 1, 1, 2, 3, 4, 4, 3] |
|
|
|
Returns: |
|
PIL Image or Tensor: Padded image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(pad) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.pad(img, padding=padding, fill=fill, padding_mode=padding_mode) |
|
|
|
return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode) |
|
|
|
|
|
def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor: |
|
"""Crop the given image at specified location and output size. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. |
|
If image size is smaller than output size along any edge, image is padded with 0 and then cropped. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. |
|
top (int): Vertical component of the top left corner of the crop box. |
|
left (int): Horizontal component of the top left corner of the crop box. |
|
height (int): Height of the crop box. |
|
width (int): Width of the crop box. |
|
|
|
Returns: |
|
PIL Image or Tensor: Cropped image. |
|
""" |
|
|
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(crop) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.crop(img, top, left, height, width) |
|
|
|
return F_t.crop(img, top, left, height, width) |
|
|
|
|
|
def center_crop(img: Tensor, output_size: List[int]) -> Tensor: |
|
"""Crops the given image at the center. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. |
|
If image size is smaller than output size along any edge, image is padded with 0 and then center cropped. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be cropped. |
|
output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int, |
|
it is used for both directions. |
|
|
|
Returns: |
|
PIL Image or Tensor: Cropped image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(center_crop) |
|
if isinstance(output_size, numbers.Number): |
|
output_size = (int(output_size), int(output_size)) |
|
elif isinstance(output_size, (tuple, list)) and len(output_size) == 1: |
|
output_size = (output_size[0], output_size[0]) |
|
|
|
_, image_height, image_width = get_dimensions(img) |
|
crop_height, crop_width = output_size |
|
|
|
if crop_width > image_width or crop_height > image_height: |
|
padding_ltrb = [ |
|
(crop_width - image_width) // 2 if crop_width > image_width else 0, |
|
(crop_height - image_height) // 2 if crop_height > image_height else 0, |
|
(crop_width - image_width + 1) // 2 if crop_width > image_width else 0, |
|
(crop_height - image_height + 1) // 2 if crop_height > image_height else 0, |
|
] |
|
img = pad(img, padding_ltrb, fill=0) |
|
_, image_height, image_width = get_dimensions(img) |
|
if crop_width == image_width and crop_height == image_height: |
|
return img |
|
|
|
crop_top = int(round((image_height - crop_height) / 2.0)) |
|
crop_left = int(round((image_width - crop_width) / 2.0)) |
|
return crop(img, crop_top, crop_left, crop_height, crop_width) |
|
|
|
|
|
def resized_crop( |
|
img: Tensor, |
|
top: int, |
|
left: int, |
|
height: int, |
|
width: int, |
|
size: List[int], |
|
interpolation: InterpolationMode = InterpolationMode.BILINEAR, |
|
antialias: Optional[bool] = True, |
|
) -> Tensor: |
|
"""Crop the given image and resize it to desired size. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions |
|
|
|
Notably used in :class:`~torchvision.transforms.RandomResizedCrop`. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image. |
|
top (int): Vertical component of the top left corner of the crop box. |
|
left (int): Horizontal component of the top left corner of the crop box. |
|
height (int): Height of the crop box. |
|
width (int): Width of the crop box. |
|
size (sequence or int): Desired output size. Same semantics as ``resize``. |
|
interpolation (InterpolationMode): Desired interpolation enum defined by |
|
:class:`torchvision.transforms.InterpolationMode`. |
|
Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``, |
|
``InterpolationMode.NEAREST_EXACT``, ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are |
|
supported. |
|
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. |
|
antialias (bool, optional): Whether to apply antialiasing. |
|
It only affects **tensors** with bilinear or bicubic modes and it is |
|
ignored otherwise: on PIL images, antialiasing is always applied on |
|
bilinear or bicubic modes; on other modes (for PIL images and |
|
tensors), antialiasing makes no sense and this parameter is ignored. |
|
Possible values are: |
|
|
|
- ``True`` (default): will apply antialiasing for bilinear or bicubic modes. |
|
Other mode aren't affected. This is probably what you want to use. |
|
- ``False``: will not apply antialiasing for tensors on any mode. PIL |
|
images are still antialiased on bilinear or bicubic modes, because |
|
PIL doesn't support no antialias. |
|
- ``None``: equivalent to ``False`` for tensors and ``True`` for |
|
PIL images. This value exists for legacy reasons and you probably |
|
don't want to use it unless you really know what you are doing. |
|
|
|
The default value changed from ``None`` to ``True`` in |
|
v0.17, for the PIL and Tensor backends to be consistent. |
|
Returns: |
|
PIL Image or Tensor: Cropped image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(resized_crop) |
|
img = crop(img, top, left, height, width) |
|
img = resize(img, size, interpolation, antialias=antialias) |
|
return img |
|
|
|
|
|
def hflip(img: Tensor) -> Tensor: |
|
"""Horizontally flip the given image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be flipped. If img |
|
is a Tensor, it is expected to be in [..., H, W] format, |
|
where ... means it can have an arbitrary number of leading |
|
dimensions. |
|
|
|
Returns: |
|
PIL Image or Tensor: Horizontally flipped image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(hflip) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.hflip(img) |
|
|
|
return F_t.hflip(img) |
|
|
|
|
|
def _get_perspective_coeffs(startpoints: List[List[int]], endpoints: List[List[int]]) -> List[float]: |
|
"""Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms. |
|
|
|
In Perspective Transform each pixel (x, y) in the original image gets transformed as, |
|
(x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) ) |
|
|
|
Args: |
|
startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners |
|
``[top-left, top-right, bottom-right, bottom-left]`` of the original image. |
|
endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners |
|
``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image. |
|
|
|
Returns: |
|
octuple (a, b, c, d, e, f, g, h) for transforming each pixel. |
|
""" |
|
if len(startpoints) != 4 or len(endpoints) != 4: |
|
raise ValueError( |
|
f"Please provide exactly four corners, got {len(startpoints)} startpoints and {len(endpoints)} endpoints." |
|
) |
|
a_matrix = torch.zeros(2 * len(startpoints), 8, dtype=torch.float64) |
|
|
|
for i, (p1, p2) in enumerate(zip(endpoints, startpoints)): |
|
a_matrix[2 * i, :] = torch.tensor([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]]) |
|
a_matrix[2 * i + 1, :] = torch.tensor([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]]) |
|
|
|
b_matrix = torch.tensor(startpoints, dtype=torch.float64).view(8) |
|
|
|
res = torch.linalg.lstsq(a_matrix, b_matrix, driver="gels").solution.to(torch.float32) |
|
|
|
output: List[float] = res.tolist() |
|
return output |
|
|
|
|
|
def perspective( |
|
img: Tensor, |
|
startpoints: List[List[int]], |
|
endpoints: List[List[int]], |
|
interpolation: InterpolationMode = InterpolationMode.BILINEAR, |
|
fill: Optional[List[float]] = None, |
|
) -> Tensor: |
|
"""Perform perspective transform of the given image. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be transformed. |
|
startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners |
|
``[top-left, top-right, bottom-right, bottom-left]`` of the original image. |
|
endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners |
|
``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image. |
|
interpolation (InterpolationMode): Desired interpolation enum defined by |
|
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``. |
|
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. |
|
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. |
|
fill (sequence or number, optional): Pixel fill value for the area outside the transformed |
|
image. If given a number, the value is used for all bands respectively. |
|
|
|
.. note:: |
|
In torchscript mode single int/float value is not supported, please use a sequence |
|
of length 1: ``[value, ]``. |
|
|
|
Returns: |
|
PIL Image or Tensor: transformed Image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(perspective) |
|
|
|
coeffs = _get_perspective_coeffs(startpoints, endpoints) |
|
|
|
if isinstance(interpolation, int): |
|
interpolation = _interpolation_modes_from_int(interpolation) |
|
elif not isinstance(interpolation, InterpolationMode): |
|
raise TypeError( |
|
"Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant" |
|
) |
|
|
|
if not isinstance(img, torch.Tensor): |
|
pil_interpolation = pil_modes_mapping[interpolation] |
|
return F_pil.perspective(img, coeffs, interpolation=pil_interpolation, fill=fill) |
|
|
|
return F_t.perspective(img, coeffs, interpolation=interpolation.value, fill=fill) |
|
|
|
|
|
def vflip(img: Tensor) -> Tensor: |
|
"""Vertically flip the given image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be flipped. If img |
|
is a Tensor, it is expected to be in [..., H, W] format, |
|
where ... means it can have an arbitrary number of leading |
|
dimensions. |
|
|
|
Returns: |
|
PIL Image or Tensor: Vertically flipped image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(vflip) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.vflip(img) |
|
|
|
return F_t.vflip(img) |
|
|
|
|
|
def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: |
|
"""Crop the given image into four corners and the central crop. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions |
|
|
|
.. Note:: |
|
This transform returns a tuple of images and there may be a |
|
mismatch in the number of inputs and targets your ``Dataset`` returns. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be cropped. |
|
size (sequence or int): Desired output size of the crop. If size is an |
|
int instead of sequence like (h, w), a square crop (size, size) is |
|
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). |
|
|
|
Returns: |
|
tuple: tuple (tl, tr, bl, br, center) |
|
Corresponding top left, top right, bottom left, bottom right and center crop. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(five_crop) |
|
if isinstance(size, numbers.Number): |
|
size = (int(size), int(size)) |
|
elif isinstance(size, (tuple, list)) and len(size) == 1: |
|
size = (size[0], size[0]) |
|
|
|
if len(size) != 2: |
|
raise ValueError("Please provide only two dimensions (h, w) for size.") |
|
|
|
_, image_height, image_width = get_dimensions(img) |
|
crop_height, crop_width = size |
|
if crop_width > image_width or crop_height > image_height: |
|
msg = "Requested crop size {} is bigger than input size {}" |
|
raise ValueError(msg.format(size, (image_height, image_width))) |
|
|
|
tl = crop(img, 0, 0, crop_height, crop_width) |
|
tr = crop(img, 0, image_width - crop_width, crop_height, crop_width) |
|
bl = crop(img, image_height - crop_height, 0, crop_height, crop_width) |
|
br = crop(img, image_height - crop_height, image_width - crop_width, crop_height, crop_width) |
|
|
|
center = center_crop(img, [crop_height, crop_width]) |
|
|
|
return tl, tr, bl, br, center |
|
|
|
|
|
def ten_crop( |
|
img: Tensor, size: List[int], vertical_flip: bool = False |
|
) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: |
|
"""Generate ten cropped images from the given image. |
|
Crop the given image into four corners and the central crop plus the |
|
flipped version of these (horizontal flipping is used by default). |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions |
|
|
|
.. Note:: |
|
This transform returns a tuple of images and there may be a |
|
mismatch in the number of inputs and targets your ``Dataset`` returns. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be cropped. |
|
size (sequence or int): Desired output size of the crop. If size is an |
|
int instead of sequence like (h, w), a square crop (size, size) is |
|
made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]). |
|
vertical_flip (bool): Use vertical flipping instead of horizontal |
|
|
|
Returns: |
|
tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip) |
|
Corresponding top left, top right, bottom left, bottom right and |
|
center crop and same for the flipped image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(ten_crop) |
|
if isinstance(size, numbers.Number): |
|
size = (int(size), int(size)) |
|
elif isinstance(size, (tuple, list)) and len(size) == 1: |
|
size = (size[0], size[0]) |
|
|
|
if len(size) != 2: |
|
raise ValueError("Please provide only two dimensions (h, w) for size.") |
|
|
|
first_five = five_crop(img, size) |
|
|
|
if vertical_flip: |
|
img = vflip(img) |
|
else: |
|
img = hflip(img) |
|
|
|
second_five = five_crop(img, size) |
|
return first_five + second_five |
|
|
|
|
|
def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor: |
|
"""Adjust brightness of an image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be adjusted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
brightness_factor (float): How much to adjust the brightness. Can be |
|
any non-negative number. 0 gives a black image, 1 gives the |
|
original image while 2 increases the brightness by a factor of 2. |
|
|
|
Returns: |
|
PIL Image or Tensor: Brightness adjusted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(adjust_brightness) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.adjust_brightness(img, brightness_factor) |
|
|
|
return F_t.adjust_brightness(img, brightness_factor) |
|
|
|
|
|
def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor: |
|
"""Adjust contrast of an image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be adjusted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
contrast_factor (float): How much to adjust the contrast. Can be any |
|
non-negative number. 0 gives a solid gray image, 1 gives the |
|
original image while 2 increases the contrast by a factor of 2. |
|
|
|
Returns: |
|
PIL Image or Tensor: Contrast adjusted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(adjust_contrast) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.adjust_contrast(img, contrast_factor) |
|
|
|
return F_t.adjust_contrast(img, contrast_factor) |
|
|
|
|
|
def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor: |
|
"""Adjust color saturation of an image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be adjusted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
saturation_factor (float): How much to adjust the saturation. 0 will |
|
give a black and white image, 1 will give the original image while |
|
2 will enhance the saturation by a factor of 2. |
|
|
|
Returns: |
|
PIL Image or Tensor: Saturation adjusted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(adjust_saturation) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.adjust_saturation(img, saturation_factor) |
|
|
|
return F_t.adjust_saturation(img, saturation_factor) |
|
|
|
|
|
def adjust_hue(img: Tensor, hue_factor: float) -> Tensor: |
|
"""Adjust hue of an image. |
|
|
|
The image hue is adjusted by converting the image to HSV and |
|
cyclically shifting the intensities in the hue channel (H). |
|
The image is then converted back to original image mode. |
|
|
|
`hue_factor` is the amount of shift in H channel and must be in the |
|
interval `[-0.5, 0.5]`. |
|
|
|
See `Hue`_ for more details. |
|
|
|
.. _Hue: https://en.wikipedia.org/wiki/Hue |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be adjusted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
If img is PIL Image mode "1", "I", "F" and modes with transparency (alpha channel) are not supported. |
|
Note: the pixel values of the input image has to be non-negative for conversion to HSV space; |
|
thus it does not work if you normalize your image to an interval with negative values, |
|
or use an interpolation that generates negative values before using this function. |
|
hue_factor (float): How much to shift the hue channel. Should be in |
|
[-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in |
|
HSV space in positive and negative direction respectively. |
|
0 means no shift. Therefore, both -0.5 and 0.5 will give an image |
|
with complementary colors while 0 gives the original image. |
|
|
|
Returns: |
|
PIL Image or Tensor: Hue adjusted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(adjust_hue) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.adjust_hue(img, hue_factor) |
|
|
|
return F_t.adjust_hue(img, hue_factor) |
|
|
|
|
|
def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor: |
|
r"""Perform gamma correction on an image. |
|
|
|
Also known as Power Law Transform. Intensities in RGB mode are adjusted |
|
based on the following equation: |
|
|
|
.. math:: |
|
I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} |
|
|
|
See `Gamma Correction`_ for more details. |
|
|
|
.. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction |
|
|
|
Args: |
|
img (PIL Image or Tensor): PIL Image to be adjusted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
If img is PIL Image, modes with transparency (alpha channel) are not supported. |
|
gamma (float): Non negative real number, same as :math:`\gamma` in the equation. |
|
gamma larger than 1 make the shadows darker, |
|
while gamma smaller than 1 make dark regions lighter. |
|
gain (float): The constant multiplier. |
|
Returns: |
|
PIL Image or Tensor: Gamma correction adjusted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(adjust_gamma) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.adjust_gamma(img, gamma, gain) |
|
|
|
return F_t.adjust_gamma(img, gamma, gain) |
|
|
|
|
|
def _get_inverse_affine_matrix( |
|
center: List[float], angle: float, translate: List[float], scale: float, shear: List[float], inverted: bool = True |
|
) -> List[float]: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rot = math.radians(angle) |
|
sx = math.radians(shear[0]) |
|
sy = math.radians(shear[1]) |
|
|
|
cx, cy = center |
|
tx, ty = translate |
|
|
|
|
|
a = math.cos(rot - sy) / math.cos(sy) |
|
b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot) |
|
c = math.sin(rot - sy) / math.cos(sy) |
|
d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot) |
|
|
|
if inverted: |
|
|
|
|
|
matrix = [d, -b, 0.0, -c, a, 0.0] |
|
matrix = [x / scale for x in matrix] |
|
|
|
matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty) |
|
matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty) |
|
|
|
matrix[2] += cx |
|
matrix[5] += cy |
|
else: |
|
matrix = [a, b, 0.0, c, d, 0.0] |
|
matrix = [x * scale for x in matrix] |
|
|
|
matrix[2] += matrix[0] * (-cx) + matrix[1] * (-cy) |
|
matrix[5] += matrix[3] * (-cx) + matrix[4] * (-cy) |
|
|
|
matrix[2] += cx + tx |
|
matrix[5] += cy + ty |
|
|
|
return matrix |
|
|
|
|
|
def rotate( |
|
img: Tensor, |
|
angle: float, |
|
interpolation: InterpolationMode = InterpolationMode.NEAREST, |
|
expand: bool = False, |
|
center: Optional[List[int]] = None, |
|
fill: Optional[List[float]] = None, |
|
) -> Tensor: |
|
"""Rotate the image by angle. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. |
|
|
|
Args: |
|
img (PIL Image or Tensor): image to be rotated. |
|
angle (number): rotation angle value in degrees, counter-clockwise. |
|
interpolation (InterpolationMode): Desired interpolation enum defined by |
|
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. |
|
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. |
|
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. |
|
expand (bool, optional): Optional expansion flag. |
|
If true, expands the output image to make it large enough to hold the entire rotated image. |
|
If false or omitted, make the output image the same size as the input image. |
|
Note that the expand flag assumes rotation around the center and no translation. |
|
center (sequence, optional): Optional center of rotation. Origin is the upper left corner. |
|
Default is the center of the image. |
|
fill (sequence or number, optional): Pixel fill value for the area outside the transformed |
|
image. If given a number, the value is used for all bands respectively. |
|
|
|
.. note:: |
|
In torchscript mode single int/float value is not supported, please use a sequence |
|
of length 1: ``[value, ]``. |
|
Returns: |
|
PIL Image or Tensor: Rotated image. |
|
|
|
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters |
|
|
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(rotate) |
|
|
|
if isinstance(interpolation, int): |
|
interpolation = _interpolation_modes_from_int(interpolation) |
|
elif not isinstance(interpolation, InterpolationMode): |
|
raise TypeError( |
|
"Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant" |
|
) |
|
|
|
if not isinstance(angle, (int, float)): |
|
raise TypeError("Argument angle should be int or float") |
|
|
|
if center is not None and not isinstance(center, (list, tuple)): |
|
raise TypeError("Argument center should be a sequence") |
|
|
|
if not isinstance(img, torch.Tensor): |
|
pil_interpolation = pil_modes_mapping[interpolation] |
|
return F_pil.rotate(img, angle=angle, interpolation=pil_interpolation, expand=expand, center=center, fill=fill) |
|
|
|
center_f = [0.0, 0.0] |
|
if center is not None: |
|
_, height, width = get_dimensions(img) |
|
|
|
center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] |
|
|
|
|
|
|
|
matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) |
|
return F_t.rotate(img, matrix=matrix, interpolation=interpolation.value, expand=expand, fill=fill) |
|
|
|
|
|
def affine( |
|
img: Tensor, |
|
angle: float, |
|
translate: List[int], |
|
scale: float, |
|
shear: List[float], |
|
interpolation: InterpolationMode = InterpolationMode.NEAREST, |
|
fill: Optional[List[float]] = None, |
|
center: Optional[List[int]] = None, |
|
) -> Tensor: |
|
"""Apply affine transformation on the image keeping image center invariant. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions. |
|
|
|
Args: |
|
img (PIL Image or Tensor): image to transform. |
|
angle (number): rotation angle in degrees between -180 and 180, clockwise direction. |
|
translate (sequence of integers): horizontal and vertical translations (post-rotation translation) |
|
scale (float): overall scale |
|
shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction. |
|
If a sequence is specified, the first value corresponds to a shear parallel to the x-axis, while |
|
the second value corresponds to a shear parallel to the y-axis. |
|
interpolation (InterpolationMode): Desired interpolation enum defined by |
|
:class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``. |
|
If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported. |
|
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. |
|
fill (sequence or number, optional): Pixel fill value for the area outside the transformed |
|
image. If given a number, the value is used for all bands respectively. |
|
|
|
.. note:: |
|
In torchscript mode single int/float value is not supported, please use a sequence |
|
of length 1: ``[value, ]``. |
|
center (sequence, optional): Optional center of rotation. Origin is the upper left corner. |
|
Default is the center of the image. |
|
|
|
Returns: |
|
PIL Image or Tensor: Transformed image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(affine) |
|
|
|
if isinstance(interpolation, int): |
|
interpolation = _interpolation_modes_from_int(interpolation) |
|
elif not isinstance(interpolation, InterpolationMode): |
|
raise TypeError( |
|
"Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant" |
|
) |
|
|
|
if not isinstance(angle, (int, float)): |
|
raise TypeError("Argument angle should be int or float") |
|
|
|
if not isinstance(translate, (list, tuple)): |
|
raise TypeError("Argument translate should be a sequence") |
|
|
|
if len(translate) != 2: |
|
raise ValueError("Argument translate should be a sequence of length 2") |
|
|
|
if scale <= 0.0: |
|
raise ValueError("Argument scale should be positive") |
|
|
|
if not isinstance(shear, (numbers.Number, (list, tuple))): |
|
raise TypeError("Shear should be either a single value or a sequence of two values") |
|
|
|
if isinstance(angle, int): |
|
angle = float(angle) |
|
|
|
if isinstance(translate, tuple): |
|
translate = list(translate) |
|
|
|
if isinstance(shear, numbers.Number): |
|
shear = [shear, 0.0] |
|
|
|
if isinstance(shear, tuple): |
|
shear = list(shear) |
|
|
|
if len(shear) == 1: |
|
shear = [shear[0], shear[0]] |
|
|
|
if len(shear) != 2: |
|
raise ValueError(f"Shear should be a sequence containing two values. Got {shear}") |
|
|
|
if center is not None and not isinstance(center, (list, tuple)): |
|
raise TypeError("Argument center should be a sequence") |
|
|
|
_, height, width = get_dimensions(img) |
|
if not isinstance(img, torch.Tensor): |
|
|
|
|
|
|
|
if center is None: |
|
center = [width * 0.5, height * 0.5] |
|
matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear) |
|
pil_interpolation = pil_modes_mapping[interpolation] |
|
return F_pil.affine(img, matrix=matrix, interpolation=pil_interpolation, fill=fill) |
|
|
|
center_f = [0.0, 0.0] |
|
if center is not None: |
|
_, height, width = get_dimensions(img) |
|
|
|
center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] |
|
|
|
translate_f = [1.0 * t for t in translate] |
|
matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) |
|
return F_t.affine(img, matrix=matrix, interpolation=interpolation.value, fill=fill) |
|
|
|
|
|
|
|
|
|
|
|
@torch.jit.unused |
|
def to_grayscale(img, num_output_channels=1): |
|
"""Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image. |
|
This transform does not support torch Tensor. |
|
|
|
Args: |
|
img (PIL Image): PIL Image to be converted to grayscale. |
|
num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1. |
|
|
|
Returns: |
|
PIL Image: Grayscale version of the image. |
|
|
|
- if num_output_channels = 1 : returned image is single channel |
|
- if num_output_channels = 3 : returned image is 3 channel with r = g = b |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(to_grayscale) |
|
if isinstance(img, Image.Image): |
|
return F_pil.to_grayscale(img, num_output_channels) |
|
|
|
raise TypeError("Input should be PIL Image") |
|
|
|
|
|
def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: |
|
"""Convert RGB image to grayscale version of image. |
|
If the image is torch Tensor, it is expected |
|
to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions |
|
|
|
Note: |
|
Please, note that this method supports only RGB images as input. For inputs in other color spaces, |
|
please, consider using :meth:`~torchvision.transforms.functional.to_grayscale` with PIL Image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): RGB Image to be converted to grayscale. |
|
num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1. |
|
|
|
Returns: |
|
PIL Image or Tensor: Grayscale version of the image. |
|
|
|
- if num_output_channels = 1 : returned image is single channel |
|
- if num_output_channels = 3 : returned image is 3 channel with r = g = b |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(rgb_to_grayscale) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.to_grayscale(img, num_output_channels) |
|
|
|
return F_t.rgb_to_grayscale(img, num_output_channels) |
|
|
|
|
|
def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor: |
|
"""Erase the input Tensor Image with given value. |
|
This transform does not support PIL Image. |
|
|
|
Args: |
|
img (Tensor Image): Tensor image of size (C, H, W) to be erased |
|
i (int): i in (i,j) i.e coordinates of the upper left corner. |
|
j (int): j in (i,j) i.e coordinates of the upper left corner. |
|
h (int): Height of the erased region. |
|
w (int): Width of the erased region. |
|
v: Erasing value. |
|
inplace(bool, optional): For in-place operations. By default, is set False. |
|
|
|
Returns: |
|
Tensor Image: Erased image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(erase) |
|
if not isinstance(img, torch.Tensor): |
|
raise TypeError(f"img should be Tensor Image. Got {type(img)}") |
|
|
|
return F_t.erase(img, i, j, h, w, v, inplace=inplace) |
|
|
|
|
|
def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor: |
|
"""Performs Gaussian blurring on the image by given kernel |
|
|
|
The convolution will be using reflection padding corresponding to the kernel size, to maintain the input shape. |
|
If the image is torch Tensor, it is expected |
|
to have [..., H, W] shape, where ... means at most one leading dimension. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be blurred |
|
kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers |
|
like ``(kx, ky)`` or a single integer for square kernels. |
|
|
|
.. note:: |
|
In torchscript mode kernel_size as single int is not supported, use a sequence of |
|
length 1: ``[ksize, ]``. |
|
sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a |
|
sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the |
|
same sigma in both X/Y directions. If None, then it is computed using |
|
``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``. |
|
Default, None. |
|
|
|
.. note:: |
|
In torchscript mode sigma as single float is |
|
not supported, use a sequence of length 1: ``[sigma, ]``. |
|
|
|
Returns: |
|
PIL Image or Tensor: Gaussian Blurred version of the image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(gaussian_blur) |
|
if not isinstance(kernel_size, (int, list, tuple)): |
|
raise TypeError(f"kernel_size should be int or a sequence of integers. Got {type(kernel_size)}") |
|
if isinstance(kernel_size, int): |
|
kernel_size = [kernel_size, kernel_size] |
|
if len(kernel_size) != 2: |
|
raise ValueError(f"If kernel_size is a sequence its length should be 2. Got {len(kernel_size)}") |
|
for ksize in kernel_size: |
|
if ksize % 2 == 0 or ksize < 0: |
|
raise ValueError(f"kernel_size should have odd and positive integers. Got {kernel_size}") |
|
|
|
if sigma is None: |
|
sigma = [ksize * 0.15 + 0.35 for ksize in kernel_size] |
|
|
|
if sigma is not None and not isinstance(sigma, (int, float, list, tuple)): |
|
raise TypeError(f"sigma should be either float or sequence of floats. Got {type(sigma)}") |
|
if isinstance(sigma, (int, float)): |
|
sigma = [float(sigma), float(sigma)] |
|
if isinstance(sigma, (list, tuple)) and len(sigma) == 1: |
|
sigma = [sigma[0], sigma[0]] |
|
if len(sigma) != 2: |
|
raise ValueError(f"If sigma is a sequence, its length should be 2. Got {len(sigma)}") |
|
for s in sigma: |
|
if s <= 0.0: |
|
raise ValueError(f"sigma should have positive values. Got {sigma}") |
|
|
|
t_img = img |
|
if not isinstance(img, torch.Tensor): |
|
if not F_pil._is_pil_image(img): |
|
raise TypeError(f"img should be PIL Image or Tensor. Got {type(img)}") |
|
|
|
t_img = pil_to_tensor(img) |
|
|
|
output = F_t.gaussian_blur(t_img, kernel_size, sigma) |
|
|
|
if not isinstance(img, torch.Tensor): |
|
output = to_pil_image(output, mode=img.mode) |
|
return output |
|
|
|
|
|
def invert(img: Tensor) -> Tensor: |
|
"""Invert the colors of an RGB/grayscale image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to have its colors inverted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
If img is PIL Image, it is expected to be in mode "L" or "RGB". |
|
|
|
Returns: |
|
PIL Image or Tensor: Color inverted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(invert) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.invert(img) |
|
|
|
return F_t.invert(img) |
|
|
|
|
|
def posterize(img: Tensor, bits: int) -> Tensor: |
|
"""Posterize an image by reducing the number of bits for each color channel. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to have its colors posterized. |
|
If img is torch Tensor, it should be of type torch.uint8, and |
|
it is expected to be in [..., 1 or 3, H, W] format, where ... means |
|
it can have an arbitrary number of leading dimensions. |
|
If img is PIL Image, it is expected to be in mode "L" or "RGB". |
|
bits (int): The number of bits to keep for each channel (0-8). |
|
Returns: |
|
PIL Image or Tensor: Posterized image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(posterize) |
|
if not (0 <= bits <= 8): |
|
raise ValueError(f"The number if bits should be between 0 and 8. Got {bits}") |
|
|
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.posterize(img, bits) |
|
|
|
return F_t.posterize(img, bits) |
|
|
|
|
|
def solarize(img: Tensor, threshold: float) -> Tensor: |
|
"""Solarize an RGB/grayscale image by inverting all pixel values above a threshold. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to have its colors inverted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
If img is PIL Image, it is expected to be in mode "L" or "RGB". |
|
threshold (float): All pixels equal or above this value are inverted. |
|
Returns: |
|
PIL Image or Tensor: Solarized image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(solarize) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.solarize(img, threshold) |
|
|
|
return F_t.solarize(img, threshold) |
|
|
|
|
|
def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor: |
|
"""Adjust the sharpness of an image. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image to be adjusted. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
sharpness_factor (float): How much to adjust the sharpness. Can be |
|
any non-negative number. 0 gives a blurred image, 1 gives the |
|
original image while 2 increases the sharpness by a factor of 2. |
|
|
|
Returns: |
|
PIL Image or Tensor: Sharpness adjusted image. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(adjust_sharpness) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.adjust_sharpness(img, sharpness_factor) |
|
|
|
return F_t.adjust_sharpness(img, sharpness_factor) |
|
|
|
|
|
def autocontrast(img: Tensor) -> Tensor: |
|
"""Maximize contrast of an image by remapping its |
|
pixels per channel so that the lowest becomes black and the lightest |
|
becomes white. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image on which autocontrast is applied. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
If img is PIL Image, it is expected to be in mode "L" or "RGB". |
|
|
|
Returns: |
|
PIL Image or Tensor: An image that was autocontrasted. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(autocontrast) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.autocontrast(img) |
|
|
|
return F_t.autocontrast(img) |
|
|
|
|
|
def equalize(img: Tensor) -> Tensor: |
|
"""Equalize the histogram of an image by applying |
|
a non-linear mapping to the input in order to create a uniform |
|
distribution of grayscale values in the output. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image on which equalize is applied. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
The tensor dtype must be ``torch.uint8`` and values are expected to be in ``[0, 255]``. |
|
If img is PIL Image, it is expected to be in mode "P", "L" or "RGB". |
|
|
|
Returns: |
|
PIL Image or Tensor: An image that was equalized. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(equalize) |
|
if not isinstance(img, torch.Tensor): |
|
return F_pil.equalize(img) |
|
|
|
return F_t.equalize(img) |
|
|
|
|
|
def elastic_transform( |
|
img: Tensor, |
|
displacement: Tensor, |
|
interpolation: InterpolationMode = InterpolationMode.BILINEAR, |
|
fill: Optional[List[float]] = None, |
|
) -> Tensor: |
|
"""Transform a tensor image with elastic transformations. |
|
Given alpha and sigma, it will generate displacement |
|
vectors for all pixels based on random offsets. Alpha controls the strength |
|
and sigma controls the smoothness of the displacements. |
|
The displacements are added to an identity grid and the resulting grid is |
|
used to grid_sample from the image. |
|
|
|
Applications: |
|
Randomly transforms the morphology of objects in images and produces a |
|
see-through-water-like effect. |
|
|
|
Args: |
|
img (PIL Image or Tensor): Image on which elastic_transform is applied. |
|
If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format, |
|
where ... means it can have an arbitrary number of leading dimensions. |
|
If img is PIL Image, it is expected to be in mode "P", "L" or "RGB". |
|
displacement (Tensor): The displacement field. Expected shape is [1, H, W, 2]. |
|
interpolation (InterpolationMode): Desired interpolation enum defined by |
|
:class:`torchvision.transforms.InterpolationMode`. |
|
Default is ``InterpolationMode.BILINEAR``. |
|
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. |
|
fill (number or str or tuple): Pixel fill value for constant fill. Default is 0. |
|
If a tuple of length 3, it is used to fill R, G, B channels respectively. |
|
This value is only used when the padding_mode is constant. |
|
""" |
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing(): |
|
_log_api_usage_once(elastic_transform) |
|
|
|
if isinstance(interpolation, int): |
|
warnings.warn( |
|
"Argument interpolation should be of type InterpolationMode instead of int. " |
|
"Please, use InterpolationMode enum." |
|
) |
|
interpolation = _interpolation_modes_from_int(interpolation) |
|
|
|
if not isinstance(displacement, torch.Tensor): |
|
raise TypeError("Argument displacement should be a Tensor") |
|
|
|
t_img = img |
|
if not isinstance(img, torch.Tensor): |
|
if not F_pil._is_pil_image(img): |
|
raise TypeError(f"img should be PIL Image or Tensor. Got {type(img)}") |
|
t_img = pil_to_tensor(img) |
|
|
|
shape = t_img.shape |
|
shape = (1,) + shape[-2:] + (2,) |
|
if shape != displacement.shape: |
|
raise ValueError(f"Argument displacement shape should be {shape}, but given {displacement.shape}") |
|
|
|
|
|
|
|
|
|
|
|
output = F_t.elastic_transform( |
|
t_img, |
|
displacement, |
|
interpolation=interpolation.value, |
|
fill=fill, |
|
) |
|
|
|
if not isinstance(img, torch.Tensor): |
|
output = to_pil_image(output, mode=img.mode) |
|
return output |
|
|