[docs]defnms(boxes:Tensor,scores:Tensor,iou_threshold:float)->Tensor:""" Performs non-maximum suppression (NMS) on the boxes according to their intersection-over-union (IoU). NMS iteratively removes lower scoring boxes which have an IoU greater than ``iou_threshold`` with another (higher scoring) box. If multiple boxes have the exact same score and satisfy the IoU criterion with respect to a reference box, the selected box is not guaranteed to be the same between CPU and GPU. This is similar to the behavior of argsort in PyTorch when repeated values are present. Args: boxes (Tensor[N, 4])): boxes to perform NMS on. They are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. scores (Tensor[N]): scores for each one of the boxes iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold Returns: Tensor: int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(nms)_assert_has_ops()returntorch.ops.torchvision.nms(boxes,scores,iou_threshold)
[docs]defbatched_nms(boxes:Tensor,scores:Tensor,idxs:Tensor,iou_threshold:float,)->Tensor:""" Performs non-maximum suppression in a batched fashion. Each index value correspond to a category, and NMS will not be applied between elements of different categories. Args: boxes (Tensor[N, 4]): boxes where NMS will be performed. They are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. scores (Tensor[N]): scores for each one of the boxes idxs (Tensor[N]): indices of the categories for each one of the boxes. iou_threshold (float): discards all overlapping boxes with IoU > iou_threshold Returns: Tensor: int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(batched_nms)# Benchmarks that drove the following thresholds are at# https://github.com/pytorch/vision/issues/1311#issuecomment-781329339ifboxes.numel()>(4000ifboxes.device.type=="cpu"else20000)andnottorchvision._is_tracing():return_batched_nms_vanilla(boxes,scores,idxs,iou_threshold)else:return_batched_nms_coordinate_trick(boxes,scores,idxs,iou_threshold)
@torch.jit._script_if_tracingdef_batched_nms_coordinate_trick(boxes:Tensor,scores:Tensor,idxs:Tensor,iou_threshold:float,)->Tensor:# strategy: in order to perform NMS independently per class,# we add an offset to all the boxes. The offset is dependent# only on the class idx, and is large enough so that boxes# from different classes do not overlapifboxes.numel()==0:returntorch.empty((0,),dtype=torch.int64,device=boxes.device)max_coordinate=boxes.max()offsets=idxs.to(boxes)*(max_coordinate+torch.tensor(1).to(boxes))boxes_for_nms=boxes+offsets[:,None]keep=nms(boxes_for_nms,scores,iou_threshold)returnkeep@torch.jit._script_if_tracingdef_batched_nms_vanilla(boxes:Tensor,scores:Tensor,idxs:Tensor,iou_threshold:float,)->Tensor:# Based on Detectron2 implementation, just manually call nms() on each class independentlykeep_mask=torch.zeros_like(scores,dtype=torch.bool)forclass_idintorch.unique(idxs):curr_indices=torch.where(idxs==class_id)[0]curr_keep_indices=nms(boxes[curr_indices],scores[curr_indices],iou_threshold)keep_mask[curr_indices[curr_keep_indices]]=Truekeep_indices=torch.where(keep_mask)[0]returnkeep_indices[scores[keep_indices].sort(descending=True)[1]]
[docs]defremove_small_boxes(boxes:Tensor,min_size:float)->Tensor:""" Remove every box from ``boxes`` which contains at least one side length that is smaller than ``min_size``. .. note:: For sanitizing a :class:`~torchvision.tv_tensors.BoundingBoxes` object, consider using the transform :func:`~torchvision.transforms.v2.SanitizeBoundingBoxes` instead. Args: boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. min_size (float): minimum size Returns: Tensor[K]: indices of the boxes that have both sides larger than ``min_size`` """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(remove_small_boxes)ws,hs=boxes[:,2]-boxes[:,0],boxes[:,3]-boxes[:,1]keep=(ws>=min_size)&(hs>=min_size)keep=torch.where(keep)[0]returnkeep
[docs]defclip_boxes_to_image(boxes:Tensor,size:Tuple[int,int])->Tensor:""" Clip boxes so that they lie inside an image of size ``size``. .. note:: For clipping a :class:`~torchvision.tv_tensors.BoundingBoxes` object, consider using the transform :func:`~torchvision.transforms.v2.ClampBoundingBoxes` instead. Args: boxes (Tensor[N, 4]): boxes in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. size (Tuple[height, width]): size of the image Returns: Tensor[N, 4]: clipped boxes """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(clip_boxes_to_image)dim=boxes.dim()boxes_x=boxes[...,0::2]boxes_y=boxes[...,1::2]height,width=sizeiftorchvision._is_tracing():boxes_x=torch.max(boxes_x,torch.tensor(0,dtype=boxes.dtype,device=boxes.device))boxes_x=torch.min(boxes_x,torch.tensor(width,dtype=boxes.dtype,device=boxes.device))boxes_y=torch.max(boxes_y,torch.tensor(0,dtype=boxes.dtype,device=boxes.device))boxes_y=torch.min(boxes_y,torch.tensor(height,dtype=boxes.dtype,device=boxes.device))else:boxes_x=boxes_x.clamp(min=0,max=width)boxes_y=boxes_y.clamp(min=0,max=height)clipped_boxes=torch.stack((boxes_x,boxes_y),dim=dim)returnclipped_boxes.reshape(boxes.shape)
[docs]defbox_convert(boxes:Tensor,in_fmt:str,out_fmt:str)->Tensor:""" Converts :class:`torch.Tensor` boxes from a given ``in_fmt`` to ``out_fmt``. .. note:: For converting a :class:`torch.Tensor` or a :class:`~torchvision.tv_tensors.BoundingBoxes` object between different formats, consider using :func:`~torchvision.transforms.v2.functional.convert_bounding_box_format` instead. Or see the corresponding transform :func:`~torchvision.transforms.v2.ConvertBoundingBoxFormat`. Supported ``in_fmt`` and ``out_fmt`` strings are: ``'xyxy'``: boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right. This is the format that torchvision utilities expect. ``'xywh'``: boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height. ``'cxcywh'``: boxes are represented via centre, width and height, cx, cy being center of box, w, h being width and height. Args: boxes (Tensor[N, 4]): boxes which will be converted. in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']. out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'] Returns: Tensor[N, 4]: Boxes into converted format. """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(box_convert)allowed_fmts=("xyxy","xywh","cxcywh")ifin_fmtnotinallowed_fmtsorout_fmtnotinallowed_fmts:raiseValueError("Unsupported Bounding Box Conversions for given in_fmt and out_fmt")ifin_fmt==out_fmt:returnboxes.clone()ifin_fmt!="xyxy"andout_fmt!="xyxy":# convert to xyxy and change in_fmt xyxyifin_fmt=="xywh":boxes=_box_xywh_to_xyxy(boxes)elifin_fmt=="cxcywh":boxes=_box_cxcywh_to_xyxy(boxes)in_fmt="xyxy"ifin_fmt=="xyxy":ifout_fmt=="xywh":boxes=_box_xyxy_to_xywh(boxes)elifout_fmt=="cxcywh":boxes=_box_xyxy_to_cxcywh(boxes)elifout_fmt=="xyxy":ifin_fmt=="xywh":boxes=_box_xywh_to_xyxy(boxes)elifin_fmt=="cxcywh":boxes=_box_cxcywh_to_xyxy(boxes)returnboxes
[docs]defbox_area(boxes:Tensor)->Tensor:""" Computes the area of a set of bounding boxes, which are specified by their (x1, y1, x2, y2) coordinates. Args: boxes (Tensor[N, 4]): boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Returns: Tensor[N]: the area for each box """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(box_area)boxes=_upcast(boxes)return(boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1])
# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py# with slight modificationsdef_box_inter_union(boxes1:Tensor,boxes2:Tensor)->Tuple[Tensor,Tensor]:area1=box_area(boxes1)area2=box_area(boxes2)lt=torch.max(boxes1[:,None,:2],boxes2[:,:2])# [N,M,2]rb=torch.min(boxes1[:,None,2:],boxes2[:,2:])# [N,M,2]wh=_upcast(rb-lt).clamp(min=0)# [N,M,2]inter=wh[:,:,0]*wh[:,:,1]# [N,M]union=area1[:,None]+area2-interreturninter,union
[docs]defbox_iou(boxes1:Tensor,boxes2:Tensor)->Tensor:""" Return intersection-over-union (Jaccard index) between two sets of boxes. Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Args: boxes1 (Tensor[N, 4]): first set of boxes boxes2 (Tensor[M, 4]): second set of boxes Returns: Tensor[N, M]: the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(box_iou)inter,union=_box_inter_union(boxes1,boxes2)iou=inter/unionreturniou
# Implementation adapted from https://github.com/facebookresearch/detr/blob/master/util/box_ops.py
[docs]defgeneralized_box_iou(boxes1:Tensor,boxes2:Tensor)->Tensor:""" Return generalized intersection-over-union (Jaccard index) between two sets of boxes. Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Args: boxes1 (Tensor[N, 4]): first set of boxes boxes2 (Tensor[M, 4]): second set of boxes Returns: Tensor[N, M]: the NxM matrix containing the pairwise generalized IoU values for every element in boxes1 and boxes2 """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(generalized_box_iou)inter,union=_box_inter_union(boxes1,boxes2)iou=inter/unionlti=torch.min(boxes1[:,None,:2],boxes2[:,:2])rbi=torch.max(boxes1[:,None,2:],boxes2[:,2:])whi=_upcast(rbi-lti).clamp(min=0)# [N,M,2]areai=whi[:,:,0]*whi[:,:,1]returniou-(areai-union)/areai
[docs]defcomplete_box_iou(boxes1:Tensor,boxes2:Tensor,eps:float=1e-7)->Tensor:""" Return complete intersection-over-union (Jaccard index) between two sets of boxes. Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Args: boxes1 (Tensor[N, 4]): first set of boxes boxes2 (Tensor[M, 4]): second set of boxes eps (float, optional): small number to prevent division by zero. Default: 1e-7 Returns: Tensor[N, M]: the NxM matrix containing the pairwise complete IoU values for every element in boxes1 and boxes2 """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(complete_box_iou)boxes1=_upcast(boxes1)boxes2=_upcast(boxes2)diou,iou=_box_diou_iou(boxes1,boxes2,eps)w_pred=boxes1[:,None,2]-boxes1[:,None,0]h_pred=boxes1[:,None,3]-boxes1[:,None,1]w_gt=boxes2[:,2]-boxes2[:,0]h_gt=boxes2[:,3]-boxes2[:,1]v=(4/(torch.pi**2))*torch.pow(torch.atan(w_pred/h_pred)-torch.atan(w_gt/h_gt),2)withtorch.no_grad():alpha=v/(1-iou+v+eps)returndiou-alpha*v
[docs]defdistance_box_iou(boxes1:Tensor,boxes2:Tensor,eps:float=1e-7)->Tensor:""" Return distance intersection-over-union (Jaccard index) between two sets of boxes. Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``. Args: boxes1 (Tensor[N, 4]): first set of boxes boxes2 (Tensor[M, 4]): second set of boxes eps (float, optional): small number to prevent division by zero. Default: 1e-7 Returns: Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values for every element in boxes1 and boxes2 """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(distance_box_iou)boxes1=_upcast(boxes1)boxes2=_upcast(boxes2)diou,_=_box_diou_iou(boxes1,boxes2,eps=eps)returndiou
def_box_diou_iou(boxes1:Tensor,boxes2:Tensor,eps:float=1e-7)->Tuple[Tensor,Tensor]:iou=box_iou(boxes1,boxes2)lti=torch.min(boxes1[:,None,:2],boxes2[:,:2])rbi=torch.max(boxes1[:,None,2:],boxes2[:,2:])whi=_upcast(rbi-lti).clamp(min=0)# [N,M,2]diagonal_distance_squared=(whi[:,:,0]**2)+(whi[:,:,1]**2)+eps# centers of boxesx_p=(boxes1[:,0]+boxes1[:,2])/2y_p=(boxes1[:,1]+boxes1[:,3])/2x_g=(boxes2[:,0]+boxes2[:,2])/2y_g=(boxes2[:,1]+boxes2[:,3])/2# The distance between boxes' centers squared.centers_distance_squared=(_upcast((x_p[:,None]-x_g[None,:]))**2)+(_upcast((y_p[:,None]-y_g[None,:]))**2)# The distance IoU is the IoU penalized by a normalized# distance between boxes' centers squared.returniou-(centers_distance_squared/diagonal_distance_squared),iou
[docs]defmasks_to_boxes(masks:torch.Tensor)->torch.Tensor:""" Compute the bounding boxes around the provided masks. Returns a [N, 4] tensor containing bounding boxes. The boxes are in ``(x1, y1, x2, y2)`` format with ``0 <= x1 <= x2`` and ``0 <= y1 <= y2``. .. warning:: In most cases the output will guarantee ``x1 < x2`` and ``y1 < y2``. But if the input is degenerate, e.g. if a mask is a single row or a single column, then the output may have x1 = x2 or y1 = y2. Args: masks (Tensor[N, H, W]): masks to transform where N is the number of masks and (H, W) are the spatial dimensions. Returns: Tensor[N, 4]: bounding boxes """ifnottorch.jit.is_scripting()andnottorch.jit.is_tracing():_log_api_usage_once(masks_to_boxes)ifmasks.numel()==0:returntorch.zeros((0,4),device=masks.device,dtype=torch.float)n=masks.shape[0]bounding_boxes=torch.zeros((n,4),device=masks.device,dtype=torch.float)forindex,maskinenumerate(masks):y,x=torch.where(mask!=0)bounding_boxes[index,0]=torch.min(x)bounding_boxes[index,1]=torch.min(y)bounding_boxes[index,2]=torch.max(x)bounding_boxes[index,3]=torch.max(y)returnbounding_boxes
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.