Shortcuts

Source code for torchvision.datapoints._bounding_box

from __future__ import annotations

from enum import Enum
from typing import Any, List, Optional, Sequence, Tuple, Union

import torch
from torchvision.transforms import InterpolationMode  # TODO: this needs to be moved out of transforms

from ._datapoint import _FillTypeJIT, Datapoint


[docs]class BoundingBoxFormat(Enum): """[BETA] Coordinate format of a bounding box. Available formats are * ``XYXY`` * ``XYWH`` * ``CXCYWH`` """ XYXY = "XYXY" XYWH = "XYWH" CXCYWH = "CXCYWH"
[docs]class BoundingBox(Datapoint): """[BETA] :class:`torch.Tensor` subclass for bounding boxes. Args: data: Any data that can be turned into a tensor with :func:`torch.as_tensor`. format (BoundingBoxFormat, str): Format of the bounding box. spatial_size (two-tuple of ints): Height and width of the corresponding image or video. dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from ``data``. device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a :class:`torch.Tensor`, the device is taken from it. Otherwise, the bounding box is constructed on the CPU. requires_grad (bool, optional): Whether autograd should record operations on the bounding box. If omitted and ``data`` is a :class:`torch.Tensor`, the value is taken from it. Otherwise, defaults to ``False``. """ format: BoundingBoxFormat spatial_size: Tuple[int, int] @classmethod def _wrap(cls, tensor: torch.Tensor, *, format: BoundingBoxFormat, spatial_size: Tuple[int, int]) -> BoundingBox: bounding_box = tensor.as_subclass(cls) bounding_box.format = format bounding_box.spatial_size = spatial_size return bounding_box def __new__( cls, data: Any, *, format: Union[BoundingBoxFormat, str], spatial_size: Tuple[int, int], dtype: Optional[torch.dtype] = None, device: Optional[Union[torch.device, str, int]] = None, requires_grad: Optional[bool] = None, ) -> BoundingBox: tensor = cls._to_tensor(data, dtype=dtype, device=device, requires_grad=requires_grad) if isinstance(format, str): format = BoundingBoxFormat[format.upper()] return cls._wrap(tensor, format=format, spatial_size=spatial_size)
[docs] @classmethod def wrap_like( cls, other: BoundingBox, tensor: torch.Tensor, *, format: Optional[BoundingBoxFormat] = None, spatial_size: Optional[Tuple[int, int]] = None, ) -> BoundingBox: """Wrap a :class:`torch.Tensor` as :class:`BoundingBox` from a reference. Args: other (BoundingBox): Reference bounding box. tensor (Tensor): Tensor to be wrapped as :class:`BoundingBox` format (BoundingBoxFormat, str, optional): Format of the bounding box. If omitted, it is taken from the reference. spatial_size (two-tuple of ints, optional): Height and width of the corresponding image or video. If omitted, it is taken from the reference. """ if isinstance(format, str): format = BoundingBoxFormat[format.upper()] return cls._wrap( tensor, format=format if format is not None else other.format, spatial_size=spatial_size if spatial_size is not None else other.spatial_size, )
def __repr__(self, *, tensor_contents: Any = None) -> str: # type: ignore[override] return self._make_repr(format=self.format, spatial_size=self.spatial_size) def horizontal_flip(self) -> BoundingBox: output = self._F.horizontal_flip_bounding_box( self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size ) return BoundingBox.wrap_like(self, output) def vertical_flip(self) -> BoundingBox: output = self._F.vertical_flip_bounding_box( self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size ) return BoundingBox.wrap_like(self, output) def resize( # type: ignore[override] self, size: List[int], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, max_size: Optional[int] = None, antialias: Optional[Union[str, bool]] = "warn", ) -> BoundingBox: output, spatial_size = self._F.resize_bounding_box( self.as_subclass(torch.Tensor), spatial_size=self.spatial_size, size=size, max_size=max_size, ) return BoundingBox.wrap_like(self, output, spatial_size=spatial_size) def crop(self, top: int, left: int, height: int, width: int) -> BoundingBox: output, spatial_size = self._F.crop_bounding_box( self.as_subclass(torch.Tensor), self.format, top=top, left=left, height=height, width=width ) return BoundingBox.wrap_like(self, output, spatial_size=spatial_size) def center_crop(self, output_size: List[int]) -> BoundingBox: output, spatial_size = self._F.center_crop_bounding_box( self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size, output_size=output_size ) return BoundingBox.wrap_like(self, output, spatial_size=spatial_size) def resized_crop( self, top: int, left: int, height: int, width: int, size: List[int], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, antialias: Optional[Union[str, bool]] = "warn", ) -> BoundingBox: output, spatial_size = self._F.resized_crop_bounding_box( self.as_subclass(torch.Tensor), self.format, top, left, height, width, size=size ) return BoundingBox.wrap_like(self, output, spatial_size=spatial_size) def pad( self, padding: Union[int, Sequence[int]], fill: Optional[Union[int, float, List[float]]] = None, padding_mode: str = "constant", ) -> BoundingBox: output, spatial_size = self._F.pad_bounding_box( self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size, padding=padding, padding_mode=padding_mode, ) return BoundingBox.wrap_like(self, output, spatial_size=spatial_size) def rotate( self, angle: float, interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST, expand: bool = False, center: Optional[List[float]] = None, fill: _FillTypeJIT = None, ) -> BoundingBox: output, spatial_size = self._F.rotate_bounding_box( self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size, angle=angle, expand=expand, center=center, ) return BoundingBox.wrap_like(self, output, spatial_size=spatial_size) def affine( self, angle: Union[int, float], translate: List[float], scale: float, shear: List[float], interpolation: Union[InterpolationMode, int] = InterpolationMode.NEAREST, fill: _FillTypeJIT = None, center: Optional[List[float]] = None, ) -> BoundingBox: output = self._F.affine_bounding_box( self.as_subclass(torch.Tensor), self.format, self.spatial_size, angle, translate=translate, scale=scale, shear=shear, center=center, ) return BoundingBox.wrap_like(self, output) def perspective( self, startpoints: Optional[List[List[int]]], endpoints: Optional[List[List[int]]], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, fill: _FillTypeJIT = None, coefficients: Optional[List[float]] = None, ) -> BoundingBox: output = self._F.perspective_bounding_box( self.as_subclass(torch.Tensor), format=self.format, spatial_size=self.spatial_size, startpoints=startpoints, endpoints=endpoints, coefficients=coefficients, ) return BoundingBox.wrap_like(self, output) def elastic( self, displacement: torch.Tensor, interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, fill: _FillTypeJIT = None, ) -> BoundingBox: output = self._F.elastic_bounding_box( self.as_subclass(torch.Tensor), self.format, self.spatial_size, displacement=displacement ) return BoundingBox.wrap_like(self, output)

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources