Source code for torchvideo.transforms.transforms.random_resized_crop_video

from typing import Tuple, Union

import PIL
from PIL.Image import Image
from torchvision.transforms import transforms as tv, functional as F

from .transform import FramesAndParams, Transform
from .types import PILVideo, PILVideoI
from .internal import canonicalize_size, to_iter, peek_iter


[docs]class RandomResizedCropVideo(Transform[PILVideo, PILVideoI, Tuple[int, int, int, int]]):
    """Crop the given video (composed of PIL Images) to random size and aspect ratio.

    A crop of random scale (default: :math:`[0.08, 1.0]`) of the original size and a
    random scale (default: :math:`[3/4, 4/3]`) of the original aspect ratio is
    made. This crop is finally resized to given size. This is popularly used to train
    the Inception networks.

    Args:
        size: Desired output size. If size is an int instead of sequence like
            ``(h, w)``, a square image ``(size, size)`` is made.
        scale: range of size of the origin size cropped.
        ratio: range of aspect ratio of the origin aspect ratio cropped.
        interpolation: Default: :py:const:`PIL.Image.BILINEAR` (see
            :py:meth:`PIL.Image.Image.resize` for other options).
    """

    def _gen_params(
        self, frames: PILVideo
    ) -> FramesAndParams[PILVideo, Tuple[int, int, int, int]]:
        frame, frames = peek_iter(to_iter(frames))
        params = tv.RandomResizedCrop.get_params(frame, self.scale, self.ratio)
        return FramesAndParams(frames=frames, params=params)

    def _transform(
        self, frames: PILVideo, params: Tuple[int, int, int, int]
    ) -> PILVideoI:
        i, j, h, w = params
        for frame in frames:
            yield self._transform_frame(frame, i, j, h, w)

    def __init__(
        self,
        size: Union[Tuple[int, int], int],
        scale: Tuple[float, float] = (0.08, 1.0),
        ratio: Tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0),
        interpolation=PIL.Image.BILINEAR,
    ):
        self.size = canonicalize_size(size)
        self.interpolation = interpolation
        self.scale = scale
        self.ratio = ratio

    def __repr__(self):
        return self.__class__.__name__ + (
            "(size={size}, interpolation={interpolation}, "
            "scale={scale}, ratio={ratio}"
        ).format(
            size=self.size,
            interpolation=self.interpolation,
            scale=self.scale,
            ratio=self.ratio,
        )

    def _transform_frame(self, frame: Image, i: int, j: int, h: int, w: int) -> Image:
        return F.resized_crop(frame, i, j, h, w, self.size, self.interpolation)