Source code for torchvideo.transforms.transforms.random_resized_crop_video

from typing import Tuple, Union

import PIL
from PIL.Image import Image
from torchvision.transforms import transforms as tv, functional as F

from .transform import FramesAndParams, Transform
from .types import PILVideo, PILVideoI
from .internal import canonicalize_size, to_iter, peek_iter


[docs]class RandomResizedCropVideo(Transform[PILVideo, PILVideoI, Tuple[int, int, int, int]]): """Crop the given video (composed of PIL Images) to random size and aspect ratio. A crop of random scale (default: :math:`[0.08, 1.0]`) of the original size and a random scale (default: :math:`[3/4, 4/3]`) of the original aspect ratio is made. This crop is finally resized to given size. This is popularly used to train the Inception networks. Args: size: Desired output size. If size is an int instead of sequence like ``(h, w)``, a square image ``(size, size)`` is made. scale: range of size of the origin size cropped. ratio: range of aspect ratio of the origin aspect ratio cropped. interpolation: Default: :py:const:`PIL.Image.BILINEAR` (see :py:meth:`PIL.Image.Image.resize` for other options). """ def _gen_params( self, frames: PILVideo ) -> FramesAndParams[PILVideo, Tuple[int, int, int, int]]: frame, frames = peek_iter(to_iter(frames)) params = tv.RandomResizedCrop.get_params(frame, self.scale, self.ratio) return FramesAndParams(frames=frames, params=params) def _transform( self, frames: PILVideo, params: Tuple[int, int, int, int] ) -> PILVideoI: i, j, h, w = params for frame in frames: yield self._transform_frame(frame, i, j, h, w) def __init__( self, size: Union[Tuple[int, int], int], scale: Tuple[float, float] = (0.08, 1.0), ratio: Tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0), interpolation=PIL.Image.BILINEAR, ): self.size = canonicalize_size(size) self.interpolation = interpolation self.scale = scale self.ratio = ratio def __repr__(self): return self.__class__.__name__ + ( "(size={size}, interpolation={interpolation}, " "scale={scale}, ratio={ratio}" ).format( size=self.size, interpolation=self.interpolation, scale=self.scale, ratio=self.ratio, ) def _transform_frame(self, frame: Image, i: int, j: int, h: int, w: int) -> Image: return F.resized_crop(frame, i, j, h, w, self.size, self.interpolation)