Source code for torchvideo.transforms.transforms.random_crop_video

from typing import Tuple, Union, Optional

from PIL.Image import Image
from torchvision.transforms import transforms as tv, functional as F

from .types import PILVideo, PILVideoI
from .internal import canonicalize_size, to_iter, peek_iter
from .transform import Transform


[docs]class RandomCropVideo(Transform[PILVideo, PILVideoI, Tuple[int, int, int, int]]):
    """Crop the given Video (composed of PIL Images) at a random location.

    Args:
        size: Desired output size of the crop. If ``size`` is an
            int instead of sequence like ``(h, w)``, a square crop ``(size, size)`` is
            made.
        padding: Optional padding on each border
            of the image. Default is ``None``, i.e no padding. If a sequence of length
            4 is provided, it is used to pad left, top, right, bottom borders
            respectively. If a sequence of length 2 is provided, it is used to
            pad left/right, top/bottom borders, respectively.
        pad_if_needed: Whether to pad the image if smaller than the
            desired size to avoid raising an exception.
        fill: Pixel fill value for constant fill. If a tuple of
            length 3, it is used to fill R, G, B channels respectively.
            This value is only used when the ``padding_mode`` is ``'constant'``.
        padding_mode: Type of padding. Should be one of: ``'constant'``, ``'edge'``,
            ``'reflect'`` or ``'symmetric'``.

             - ``'constant'``: pads with a constant value, this value is specified with
               fill.
             - ``'edge'``: pads with the last value on the edge of the image.
             - ``'reflect'``: pads with reflection of image (without repeating the last
               value on the edge) padding ``[1, 2, 3, 4]`` with 2 elements on both sides
               in reflect mode will result in ``[3, 2, 1, 2, 3, 4, 3, 2]``.
             - ``'symmetric'``: pads with reflection of image (repeating the last value
               on the edge) padding ``[1, 2, 3, 4]`` with 2 elements on both sides in
               symmetric mode will result in ``[2, 1, 1, 2, 3, 4, 4, 3]``.

    """

    def __init__(
        self,
        size: Union[Tuple[int, int], int],
        padding: Optional[Union[Tuple[int, int, int, int], Tuple[int, int]]] = None,
        pad_if_needed: bool = False,
        fill: int = 0,
        padding_mode: str = "constant",
    ):
        super().__init__()
        self.size = canonicalize_size(size)
        self.padding = padding
        self.pad_if_needed = pad_if_needed
        self.fill = fill
        self.padding_mode = padding_mode

    def _gen_params(self, frames: PILVideo) -> Tuple[int, int, int, int]:
        frames = to_iter(frames)
        first_frame, frames = peek_iter(frames)
        first_frame = self._maybe_pad(first_frame)
        params = tv.RandomCrop.get_params(first_frame, self.size)
        return params

    def _transform(
        self, frames: PILVideo, params: Tuple[int, int, int, int]
    ) -> PILVideoI:
        for frame in frames:
            yield F.crop(self._maybe_pad(frame), *params)

    def _maybe_pad(self, frame: Image):
        if self.padding is not None:
            frame = F.pad(frame, self.padding, self.fill, self.padding_mode)
        # pad the width if needed
        frame_width = frame.size[0]
        desired_width = self.size[1]
        if self.pad_if_needed and frame_width < desired_width:
            horizontal_padding = desired_width - frame_width
            frame = F.pad(frame, (horizontal_padding, 0), self.fill, self.padding_mode)
        # pad the height if needed
        frame_height = frame.size[1]
        desired_height = self.size[0]
        if self.pad_if_needed and frame_height < desired_height:
            vertical_padding = desired_height - frame_height
            frame = F.pad(frame, (0, vertical_padding), self.fill, self.padding_mode)
        return frame

    def __repr__(self) -> str:
        return (
            self.__class__.__name__ + "(size={size!r}, padding={padding!r}, "
            "pad_if_needed={pad_if_needed!r}, "
            "fill={fill!r}, padding_mode={padding_mode!r})".format(
                size=tuple(self.size),
                padding=self.padding,
                pad_if_needed=self.pad_if_needed,
                fill=self.fill,
                padding_mode=self.padding_mode,
            )
        )