Source code for torchvision.datasets._stereo_matching
importfunctoolsimportjsonimportosimportrandomimportshutilfromabcimportABC,abstractmethodfromglobimportglobfrompathlibimportPathfromtypingimportCallable,cast,List,Optional,Tuple,UnionimportnumpyasnpfromPILimportImagefrom.utilsimport_read_pfm,download_and_extract_archive,verify_str_argfrom.visionimportVisionDatasetT1=Tuple[Image.Image,Image.Image,Optional[np.ndarray],np.ndarray]T2=Tuple[Image.Image,Image.Image,Optional[np.ndarray]]__all__=()_read_pfm_file=functools.partial(_read_pfm,slice_channels=1)classStereoMatchingDataset(ABC,VisionDataset):"""Base interface for Stereo matching datasets"""_has_built_in_disparity_mask=Falsedef__init__(self,root:Union[str,Path],transforms:Optional[Callable]=None)->None:""" Args: root(str): Root directory of the dataset. transforms(callable, optional): A function/transform that takes in Tuples of (images, disparities, valid_masks) and returns a transformed version of each of them. images is a Tuple of (``PIL.Image``, ``PIL.Image``) disparities is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (1, H, W) valid_masks is a Tuple of (``np.ndarray``, ``np.ndarray``) with shape (H, W) In some cases, when a dataset does not provide disparities, the ``disparities`` and ``valid_masks`` can be Tuples containing None values. For training splits generally the datasets provide a minimal guarantee of images: (``PIL.Image``, ``PIL.Image``) disparities: (``np.ndarray``, ``None``) with shape (1, H, W) Optionally, based on the dataset, it can return a ``mask`` as well: valid_masks: (``np.ndarray | None``, ``None``) with shape (H, W) For some test splits, the datasets provides outputs that look like: imgaes: (``PIL.Image``, ``PIL.Image``) disparities: (``None``, ``None``) Optionally, based on the dataset, it can return a ``mask`` as well: valid_masks: (``None``, ``None``) """super().__init__(root=root)self.transforms=transformsself._images=[]# type: ignoreself._disparities=[]# type: ignoredef_read_img(self,file_path:Union[str,Path])->Image.Image:img=Image.open(file_path)ifimg.mode!="RGB":img=img.convert("RGB")# type: ignore [assignment]returnimgdef_scan_pairs(self,paths_left_pattern:str,paths_right_pattern:Optional[str]=None,)->List[Tuple[str,Optional[str]]]:left_paths=list(sorted(glob(paths_left_pattern)))right_paths:List[Union[None,str]]ifpaths_right_pattern:right_paths=list(sorted(glob(paths_right_pattern)))else:right_paths=list(Nonefor_inleft_paths)ifnotleft_paths:raiseFileNotFoundError(f"Could not find any files matching the patterns: {paths_left_pattern}")ifnotright_paths:raiseFileNotFoundError(f"Could not find any files matching the patterns: {paths_right_pattern}")iflen(left_paths)!=len(right_paths):raiseValueError(f"Found {len(left_paths)} left files but {len(right_paths)} right files using:\n "f"left pattern: {paths_left_pattern}\n"f"right pattern: {paths_right_pattern}\n")paths=list((left,right)forleft,rightinzip(left_paths,right_paths))returnpaths@abstractmethoddef_read_disparity(self,file_path:str)->Tuple[Optional[np.ndarray],Optional[np.ndarray]]:# function that returns a disparity map and an occlusion mappassdef__getitem__(self,index:int)->Union[T1,T2]:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 3 or 4-tuple with ``(img_left, img_right, disparity, Optional[valid_mask])`` where ``valid_mask`` can be a numpy boolean mask of shape (H, W) if the dataset provides a file indicating which disparity pixels are valid. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. ``disparity`` is None for datasets on which for ``split="test"`` the authors did not provide annotations. """img_left=self._read_img(self._images[index][0])img_right=self._read_img(self._images[index][1])dsp_map_left,valid_mask_left=self._read_disparity(self._disparities[index][0])dsp_map_right,valid_mask_right=self._read_disparity(self._disparities[index][1])imgs=(img_left,img_right)dsp_maps=(dsp_map_left,dsp_map_right)valid_masks=(valid_mask_left,valid_mask_right)ifself.transformsisnotNone:(imgs,dsp_maps,valid_masks,)=self.transforms(imgs,dsp_maps,valid_masks)ifself._has_built_in_disparity_maskorvalid_masks[0]isnotNone:returnimgs[0],imgs[1],dsp_maps[0],cast(np.ndarray,valid_masks[0])else:returnimgs[0],imgs[1],dsp_maps[0]def__len__(self)->int:returnlen(self._images)
[docs]classCarlaStereo(StereoMatchingDataset):""" Carla simulator data linked in the `CREStereo github repo <https://github.com/megvii-research/CREStereo>`_. The dataset is expected to have the following structure: :: root carla-highres trainingF scene1 img0.png img1.png disp0GT.pfm disp1GT.pfm calib.txt scene2 img0.png img1.png disp0GT.pfm disp1GT.pfm calib.txt ... Args: root (str or ``pathlib.Path``): Root directory where `carla-highres` is located. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """def__init__(self,root:Union[str,Path],transforms:Optional[Callable]=None)->None:super().__init__(root,transforms)root=Path(root)/"carla-highres"left_image_pattern=str(root/"trainingF"/"*"/"im0.png")right_image_pattern=str(root/"trainingF"/"*"/"im1.png")imgs=self._scan_pairs(left_image_pattern,right_image_pattern)self._images=imgsleft_disparity_pattern=str(root/"trainingF"/"*"/"disp0GT.pfm")right_disparity_pattern=str(root/"trainingF"/"*"/"disp1GT.pfm")disparities=self._scan_pairs(left_disparity_pattern,right_disparity_pattern)self._disparities=disparitiesdef_read_disparity(self,file_path:str)->Tuple[np.ndarray,None]:disparity_map=_read_pfm_file(file_path)disparity_map=np.abs(disparity_map)# ensure that the disparity is positivevalid_mask=Nonereturndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T1:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 3-tuple with ``(img_left, img_right, disparity)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. If a ``valid_mask`` is generated within the ``transforms`` parameter, a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. """returncast(T1,super().__getitem__(index))
[docs]classKitti2012Stereo(StereoMatchingDataset):""" KITTI dataset from the `2012 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_stereo_flow.php>`_. Uses the RGB images for consistency with KITTI 2015. The dataset is expected to have the following structure: :: root Kitti2012 testing colored_0 1_10.png 2_10.png ... colored_1 1_10.png 2_10.png ... training colored_0 1_10.png 2_10.png ... colored_1 1_10.png 2_10.png ... disp_noc 1.png 2.png ... calib Args: root (str or ``pathlib.Path``): Root directory where `Kitti2012` is located. split (string, optional): The dataset split of scenes, either "train" (default) or "test". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """_has_built_in_disparity_mask=Truedef__init__(self,root:Union[str,Path],split:str="train",transforms:Optional[Callable]=None)->None:super().__init__(root,transforms)verify_str_arg(split,"split",valid_values=("train","test"))root=Path(root)/"Kitti2012"/(split+"ing")left_img_pattern=str(root/"colored_0"/"*_10.png")right_img_pattern=str(root/"colored_1"/"*_10.png")self._images=self._scan_pairs(left_img_pattern,right_img_pattern)ifsplit=="train":disparity_pattern=str(root/"disp_noc"/"*.png")self._disparities=self._scan_pairs(disparity_pattern,None)else:self._disparities=list((None,None)for_inself._images)def_read_disparity(self,file_path:str)->Tuple[Optional[np.ndarray],None]:# test split has no disparity mapsiffile_pathisNone:returnNone,Nonedisparity_map=np.asarray(Image.open(file_path))/256.0# unsqueeze the disparity map into (C, H, W) formatdisparity_map=disparity_map[None,:,:]valid_mask=Nonereturndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T1:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not generate a valid mask. Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test. """returncast(T1,super().__getitem__(index))
[docs]classKitti2015Stereo(StereoMatchingDataset):""" KITTI dataset from the `2015 stereo evaluation benchmark <http://www.cvlibs.net/datasets/kitti/eval_scene_flow.php>`_. The dataset is expected to have the following structure: :: root Kitti2015 testing image_2 img1.png img2.png ... image_3 img1.png img2.png ... training image_2 img1.png img2.png ... image_3 img1.png img2.png ... disp_occ_0 img1.png img2.png ... disp_occ_1 img1.png img2.png ... calib Args: root (str or ``pathlib.Path``): Root directory where `Kitti2015` is located. split (string, optional): The dataset split of scenes, either "train" (default) or "test". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """_has_built_in_disparity_mask=Truedef__init__(self,root:Union[str,Path],split:str="train",transforms:Optional[Callable]=None)->None:super().__init__(root,transforms)verify_str_arg(split,"split",valid_values=("train","test"))root=Path(root)/"Kitti2015"/(split+"ing")left_img_pattern=str(root/"image_2"/"*.png")right_img_pattern=str(root/"image_3"/"*.png")self._images=self._scan_pairs(left_img_pattern,right_img_pattern)ifsplit=="train":left_disparity_pattern=str(root/"disp_occ_0"/"*.png")right_disparity_pattern=str(root/"disp_occ_1"/"*.png")self._disparities=self._scan_pairs(left_disparity_pattern,right_disparity_pattern)else:self._disparities=list((None,None)for_inself._images)def_read_disparity(self,file_path:str)->Tuple[Optional[np.ndarray],None]:# test split has no disparity mapsiffile_pathisNone:returnNone,Nonedisparity_map=np.asarray(Image.open(file_path))/256.0# unsqueeze the disparity map into (C, H, W) formatdisparity_map=disparity_map[None,:,:]valid_mask=Nonereturndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T1:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not generate a valid mask. Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test. """returncast(T1,super().__getitem__(index))
[docs]classMiddlebury2014Stereo(StereoMatchingDataset):"""Publicly available scenes from the Middlebury dataset `2014 version <https://vision.middlebury.edu/stereo/data/scenes2014/>`. The dataset mostly follows the original format, without containing the ambient subdirectories. : :: root Middlebury2014 train scene1-{perfect,imperfect} calib.txt im{0,1}.png im1E.png im1L.png disp{0,1}.pfm disp{0,1}-n.png disp{0,1}-sd.pfm disp{0,1}y.pfm scene2-{perfect,imperfect} calib.txt im{0,1}.png im1E.png im1L.png disp{0,1}.pfm disp{0,1}-n.png disp{0,1}-sd.pfm disp{0,1}y.pfm ... additional scene1-{perfect,imperfect} calib.txt im{0,1}.png im1E.png im1L.png disp{0,1}.pfm disp{0,1}-n.png disp{0,1}-sd.pfm disp{0,1}y.pfm ... test scene1 calib.txt im{0,1}.png scene2 calib.txt im{0,1}.png ... Args: root (str or ``pathlib.Path``): Root directory of the Middleburry 2014 Dataset. split (string, optional): The dataset split of scenes, either "train" (default), "test", or "additional" use_ambient_views (boolean, optional): Whether to use different expose or lightning views when possible. The dataset samples with equal probability between ``[im1.png, im1E.png, im1L.png]``. calibration (string, optional): Whether or not to use the calibrated (default) or uncalibrated scenes. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. download (boolean, optional): Whether or not to download the dataset in the ``root`` directory. """splits={"train":["Adirondack","Jadeplant","Motorcycle","Piano","Pipes","Playroom","Playtable","Recycle","Shelves","Vintage",],"additional":["Backpack","Bicycle1","Cable","Classroom1","Couch","Flowers","Mask","Shopvac","Sticks","Storage","Sword1","Sword2","Umbrella",],"test":["Plants","Classroom2E","Classroom2","Australia","DjembeL","CrusadeP","Crusade","Hoops","Bicycle2","Staircase","Newkuba","AustraliaP","Djembe","Livingroom","Computer",],}_has_built_in_disparity_mask=Truedef__init__(self,root:Union[str,Path],split:str="train",calibration:Optional[str]="perfect",use_ambient_views:bool=False,transforms:Optional[Callable]=None,download:bool=False,)->None:super().__init__(root,transforms)verify_str_arg(split,"split",valid_values=("train","test","additional"))self.split=splitifcalibration:verify_str_arg(calibration,"calibration",valid_values=("perfect","imperfect","both",None))# type: ignoreifsplit=="test":raiseValueError("Split 'test' has only no calibration settings, please set `calibration=None`.")else:ifsplit!="test":raiseValueError(f"Split '{split}' has calibration settings, however None was provided as an argument."f"\nSetting calibration to 'perfect' for split '{split}'. Available calibration settings are: 'perfect', 'imperfect', 'both'.",)ifdownload:self._download_dataset(root)root=Path(root)/"Middlebury2014"ifnotos.path.exists(root/split):raiseFileNotFoundError(f"The {split} directory was not found in the provided root directory")split_scenes=self.splits[split]# check that the provided root folder contains the scene splitsifnotany(# using startswith to account for perfect / imperfect calibrartionscene.startswith(s)forsceneinos.listdir(root/split)forsinsplit_scenes):raiseFileNotFoundError(f"Provided root folder does not contain any scenes from the {split} split.")calibrartion_suffixes={None:[""],"perfect":["-perfect"],"imperfect":["-imperfect"],"both":["-perfect","-imperfect"],}[calibration]forcalibration_suffixincalibrartion_suffixes:scene_pattern="*"+calibration_suffixleft_img_pattern=str(root/split/scene_pattern/"im0.png")right_img_pattern=str(root/split/scene_pattern/"im1.png")self._images+=self._scan_pairs(left_img_pattern,right_img_pattern)ifsplit=="test":self._disparities=list((None,None)for_inself._images)else:left_dispartity_pattern=str(root/split/scene_pattern/"disp0.pfm")right_dispartity_pattern=str(root/split/scene_pattern/"disp1.pfm")self._disparities+=self._scan_pairs(left_dispartity_pattern,right_dispartity_pattern)self.use_ambient_views=use_ambient_viewsdef_read_img(self,file_path:Union[str,Path])->Image.Image:""" Function that reads either the original right image or an augmented view when ``use_ambient_views`` is True. When ``use_ambient_views`` is True, the dataset will return at random one of ``[im1.png, im1E.png, im1L.png]`` as the right image. """ambient_file_paths:List[Union[str,Path]]# make mypy happyifnotisinstance(file_path,Path):file_path=Path(file_path)iffile_path.name=="im1.png"andself.use_ambient_views:base_path=file_path.parent# initialize sampleable containerambient_file_paths=list(base_path/view_nameforview_namein["im1E.png","im1L.png"])# double check that we're not going to try to read from an invalid file pathambient_file_paths=list(filter(lambdap:os.path.exists(p),ambient_file_paths))# keep the original image as an option as well for uniform sampling between base viewsambient_file_paths.append(file_path)file_path=random.choice(ambient_file_paths)# type: ignorereturnsuper()._read_img(file_path)def_read_disparity(self,file_path:str)->Union[Tuple[None,None],Tuple[np.ndarray,np.ndarray]]:# test split has not disparity mapsiffile_pathisNone:returnNone,Nonedisparity_map=_read_pfm_file(file_path)disparity_map=np.abs(disparity_map)# ensure that the disparity is positivedisparity_map[disparity_map==np.inf]=0# remove infinite disparitiesvalid_mask=(disparity_map>0).squeeze(0)# mask out invalid disparitiesreturndisparity_map,valid_maskdef_download_dataset(self,root:Union[str,Path])->None:base_url="https://vision.middlebury.edu/stereo/data/scenes2014/zip"# train and additional splits have 2 different calibration settingsroot=Path(root)/"Middlebury2014"split_name=self.splitifsplit_name!="test":forsplit_sceneinself.splits[split_name]:split_root=root/split_nameforcalibrationin["perfect","imperfect"]:scene_name=f"{split_scene}-{calibration}"scene_url=f"{base_url}/{scene_name}.zip"# download the scene only if it doesn't existifnot(split_root/scene_name).exists():download_and_extract_archive(url=scene_url,filename=f"{scene_name}.zip",download_root=str(split_root),remove_finished=True,)else:os.makedirs(root/"test")ifany(snotinos.listdir(root/"test")forsinself.splits["test"]):# test split is downloaded from a different locationtest_set_url="https://vision.middlebury.edu/stereo/submit3/zip/MiddEval3-data-F.zip"# the unzip is going to produce a directory MiddEval3 with two subdirectories trainingF and testF# we want to move the contents from testF into the directorydownload_and_extract_archive(url=test_set_url,download_root=str(root),remove_finished=True)forscene_dir,scene_names,_inos.walk(str(root/"MiddEval3/testF")):forsceneinscene_names:scene_dst_dir=root/"test"scene_src_dir=Path(scene_dir)/sceneos.makedirs(scene_dst_dir,exist_ok=True)shutil.move(str(scene_src_dir),str(scene_dst_dir))# cleanup MiddEval3 directoryshutil.rmtree(str(root/"MiddEval3"))
[docs]def__getitem__(self,index:int)->T2:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. ``valid_mask`` is implicitly ``None`` for `split=test`. """returncast(T2,super().__getitem__(index))
[docs]classCREStereo(StereoMatchingDataset):"""Synthetic dataset used in training the `CREStereo <https://arxiv.org/pdf/2203.11483.pdf>`_ architecture. Dataset details on the official paper `repo <https://github.com/megvii-research/CREStereo>`_. The dataset is expected to have the following structure: :: root CREStereo tree img1_left.jpg img1_right.jpg img1_left.disp.jpg img1_right.disp.jpg img2_left.jpg img2_right.jpg img2_left.disp.jpg img2_right.disp.jpg ... shapenet img1_left.jpg img1_right.jpg img1_left.disp.jpg img1_right.disp.jpg ... reflective img1_left.jpg img1_right.jpg img1_left.disp.jpg img1_right.disp.jpg ... hole img1_left.jpg img1_right.jpg img1_left.disp.jpg img1_right.disp.jpg ... Args: root (str): Root directory of the dataset. transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """_has_built_in_disparity_mask=Truedef__init__(self,root:Union[str,Path],transforms:Optional[Callable]=None,)->None:super().__init__(root,transforms)root=Path(root)/"CREStereo"dirs=["shapenet","reflective","tree","hole"]forsindirs:left_image_pattern=str(root/s/"*_left.jpg")right_image_pattern=str(root/s/"*_right.jpg")imgs=self._scan_pairs(left_image_pattern,right_image_pattern)self._images+=imgsleft_disparity_pattern=str(root/s/"*_left.disp.png")right_disparity_pattern=str(root/s/"*_right.disp.png")disparities=self._scan_pairs(left_disparity_pattern,right_disparity_pattern)self._disparities+=disparitiesdef_read_disparity(self,file_path:str)->Tuple[np.ndarray,None]:disparity_map=np.asarray(Image.open(file_path),dtype=np.float32)# unsqueeze the disparity map into (C, H, W) formatdisparity_map=disparity_map[None,:,:]/32.0valid_mask=Nonereturndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T1:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not generate a valid mask. """returncast(T1,super().__getitem__(index))
[docs]classFallingThingsStereo(StereoMatchingDataset):"""`FallingThings <https://research.nvidia.com/publication/2018-06_falling-things-synthetic-dataset-3d-object-detection-and-pose-estimation>`_ dataset. The dataset is expected to have the following structure: :: root FallingThings single dir1 scene1 _object_settings.json _camera_settings.json image1.left.depth.png image1.right.depth.png image1.left.jpg image1.right.jpg image2.left.depth.png image2.right.depth.png image2.left.jpg image2.right ... scene2 ... mixed scene1 _object_settings.json _camera_settings.json image1.left.depth.png image1.right.depth.png image1.left.jpg image1.right.jpg image2.left.depth.png image2.right.depth.png image2.left.jpg image2.right ... scene2 ... Args: root (str or ``pathlib.Path``): Root directory where FallingThings is located. variant (string): Which variant to use. Either "single", "mixed", or "both". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """def__init__(self,root:Union[str,Path],variant:str="single",transforms:Optional[Callable]=None)->None:super().__init__(root,transforms)root=Path(root)/"FallingThings"verify_str_arg(variant,"variant",valid_values=("single","mixed","both"))variants={"single":["single"],"mixed":["mixed"],"both":["single","mixed"],}[variant]split_prefix={"single":Path("*")/"*","mixed":Path("*"),}forsinvariants:left_img_pattern=str(root/s/split_prefix[s]/"*.left.jpg")right_img_pattern=str(root/s/split_prefix[s]/"*.right.jpg")self._images+=self._scan_pairs(left_img_pattern,right_img_pattern)left_disparity_pattern=str(root/s/split_prefix[s]/"*.left.depth.png")right_disparity_pattern=str(root/s/split_prefix[s]/"*.right.depth.png")self._disparities+=self._scan_pairs(left_disparity_pattern,right_disparity_pattern)def_read_disparity(self,file_path:str)->Tuple[np.ndarray,None]:# (H, W) imagedepth=np.asarray(Image.open(file_path))# as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt# in order to extract disparity from depth mapscamera_settings_path=Path(file_path).parent/"_camera_settings.json"withopen(camera_settings_path,"r")asf:# inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constant)intrinsics=json.load(f)focal=intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"]baseline,pixel_constant=6,100# pixel constant is inverteddisparity_map=(baseline*focal*pixel_constant)/depth.astype(np.float32)# unsqueeze disparity to (C, H, W)disparity_map=disparity_map[None,:,:]valid_mask=Nonereturndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T1:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 3-tuple with ``(img_left, img_right, disparity)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. If a ``valid_mask`` is generated within the ``transforms`` parameter, a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. """returncast(T1,super().__getitem__(index))
[docs]classSceneFlowStereo(StereoMatchingDataset):"""Dataset interface for `Scene Flow <https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html>`_ datasets. This interface provides access to the `FlyingThings3D, `Monkaa` and `Driving` datasets. The dataset is expected to have the following structure: :: root SceneFlow Monkaa frames_cleanpass scene1 left img1.png img2.png right img1.png img2.png scene2 left img1.png img2.png right img1.png img2.png frames_finalpass scene1 left img1.png img2.png right img1.png img2.png ... ... disparity scene1 left img1.pfm img2.pfm right img1.pfm img2.pfm FlyingThings3D ... ... Args: root (str or ``pathlib.Path``): Root directory where SceneFlow is located. variant (string): Which dataset variant to user, "FlyingThings3D" (default), "Monkaa" or "Driving". pass_name (string): Which pass to use, "clean" (default), "final" or "both". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """def__init__(self,root:Union[str,Path],variant:str="FlyingThings3D",pass_name:str="clean",transforms:Optional[Callable]=None,)->None:super().__init__(root,transforms)root=Path(root)/"SceneFlow"verify_str_arg(variant,"variant",valid_values=("FlyingThings3D","Driving","Monkaa"))verify_str_arg(pass_name,"pass_name",valid_values=("clean","final","both"))passes={"clean":["frames_cleanpass"],"final":["frames_finalpass"],"both":["frames_cleanpass","frames_finalpass"],}[pass_name]root=root/variantprefix_directories={"Monkaa":Path("*"),"FlyingThings3D":Path("*")/"*"/"*","Driving":Path("*")/"*"/"*",}forpinpasses:left_image_pattern=str(root/p/prefix_directories[variant]/"left"/"*.png")right_image_pattern=str(root/p/prefix_directories[variant]/"right"/"*.png")self._images+=self._scan_pairs(left_image_pattern,right_image_pattern)left_disparity_pattern=str(root/"disparity"/prefix_directories[variant]/"left"/"*.pfm")right_disparity_pattern=str(root/"disparity"/prefix_directories[variant]/"right"/"*.pfm")self._disparities+=self._scan_pairs(left_disparity_pattern,right_disparity_pattern)def_read_disparity(self,file_path:str)->Tuple[np.ndarray,None]:disparity_map=_read_pfm_file(file_path)disparity_map=np.abs(disparity_map)# ensure that the disparity is positivevalid_mask=Nonereturndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T1:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 3-tuple with ``(img_left, img_right, disparity)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. If a ``valid_mask`` is generated within the ``transforms`` parameter, a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. """returncast(T1,super().__getitem__(index))
[docs]classSintelStereo(StereoMatchingDataset):"""Sintel `Stereo Dataset <http://sintel.is.tue.mpg.de/stereo>`_. The dataset is expected to have the following structure: :: root Sintel training final_left scene1 img1.png img2.png ... ... final_right scene2 img1.png img2.png ... ... disparities scene1 img1.png img2.png ... ... occlusions scene1 img1.png img2.png ... ... outofframe scene1 img1.png img2.png ... ... Args: root (str or ``pathlib.Path``): Root directory where Sintel Stereo is located. pass_name (string): The name of the pass to use, either "final", "clean" or "both". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """_has_built_in_disparity_mask=Truedef__init__(self,root:Union[str,Path],pass_name:str="final",transforms:Optional[Callable]=None)->None:super().__init__(root,transforms)verify_str_arg(pass_name,"pass_name",valid_values=("final","clean","both"))root=Path(root)/"Sintel"pass_names={"final":["final"],"clean":["clean"],"both":["final","clean"],}[pass_name]forpinpass_names:left_img_pattern=str(root/"training"/f"{p}_left"/"*"/"*.png")right_img_pattern=str(root/"training"/f"{p}_right"/"*"/"*.png")self._images+=self._scan_pairs(left_img_pattern,right_img_pattern)disparity_pattern=str(root/"training"/"disparities"/"*"/"*.png")self._disparities+=self._scan_pairs(disparity_pattern,None)def_get_occlussion_mask_paths(self,file_path:str)->Tuple[str,str]:# helper function to get the occlusion mask paths# a path will look like .../.../.../training/disparities/scene1/img1.png# we want to get something like .../.../.../training/occlusions/scene1/img1.pngfpath=Path(file_path)basename=fpath.namescenedir=fpath.parent# the parent of the scenedir is actually the disparity dirsampledir=scenedir.parent.parentocclusion_path=str(sampledir/"occlusions"/scenedir.name/basename)outofframe_path=str(sampledir/"outofframe"/scenedir.name/basename)ifnotos.path.exists(occlusion_path):raiseFileNotFoundError(f"Occlusion mask {occlusion_path} does not exist")ifnotos.path.exists(outofframe_path):raiseFileNotFoundError(f"Out of frame mask {outofframe_path} does not exist")returnocclusion_path,outofframe_pathdef_read_disparity(self,file_path:str)->Union[Tuple[None,None],Tuple[np.ndarray,np.ndarray]]:iffile_pathisNone:returnNone,None# disparity decoding as per Sintel instructions in the README provided with the datasetdisparity_map=np.asarray(Image.open(file_path),dtype=np.float32)r,g,b=np.split(disparity_map,3,axis=-1)disparity_map=r*4+g/(2**6)+b/(2**14)# reshape into (C, H, W) formatdisparity_map=np.transpose(disparity_map,(2,0,1))# find the appropriate file pathsocclued_mask_path,out_of_frame_mask_path=self._get_occlussion_mask_paths(file_path)# occlusion masksvalid_mask=np.asarray(Image.open(occlued_mask_path))==0# out of frame masksoff_mask=np.asarray(Image.open(out_of_frame_mask_path))==0# combine the masks togethervalid_mask=np.logical_and(off_mask,valid_mask)returndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T2:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. The disparity is a numpy array of shape (1, H, W) and the images are PIL images whilst the valid_mask is a numpy array of shape (H, W). """returncast(T2,super().__getitem__(index))
[docs]classInStereo2k(StereoMatchingDataset):"""`InStereo2k <https://github.com/YuhuaXu/StereoDataset>`_ dataset. The dataset is expected to have the following structure: :: root InStereo2k train scene1 left.png right.png left_disp.png right_disp.png ... scene2 ... test scene1 left.png right.png left_disp.png right_disp.png ... scene2 ... Args: root (str or ``pathlib.Path``): Root directory where InStereo2k is located. split (string): Either "train" or "test". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """def__init__(self,root:Union[str,Path],split:str="train",transforms:Optional[Callable]=None)->None:super().__init__(root,transforms)root=Path(root)/"InStereo2k"/splitverify_str_arg(split,"split",valid_values=("train","test"))left_img_pattern=str(root/"*"/"left.png")right_img_pattern=str(root/"*"/"right.png")self._images=self._scan_pairs(left_img_pattern,right_img_pattern)left_disparity_pattern=str(root/"*"/"left_disp.png")right_disparity_pattern=str(root/"*"/"right_disp.png")self._disparities=self._scan_pairs(left_disparity_pattern,right_disparity_pattern)def_read_disparity(self,file_path:str)->Tuple[np.ndarray,None]:disparity_map=np.asarray(Image.open(file_path),dtype=np.float32)# unsqueeze disparity to (C, H, W)disparity_map=disparity_map[None,:,:]/1024.0valid_mask=Nonereturndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T1:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 3-tuple with ``(img_left, img_right, disparity)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. If a ``valid_mask`` is generated within the ``transforms`` parameter, a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned. """returncast(T1,super().__getitem__(index))
[docs]classETH3DStereo(StereoMatchingDataset):"""ETH3D `Low-Res Two-View <https://www.eth3d.net/datasets>`_ dataset. The dataset is expected to have the following structure: :: root ETH3D two_view_training scene1 im1.png im0.png images.txt cameras.txt calib.txt scene2 im1.png im0.png images.txt cameras.txt calib.txt ... two_view_training_gt scene1 disp0GT.pfm mask0nocc.png scene2 disp0GT.pfm mask0nocc.png ... two_view_testing scene1 im1.png im0.png images.txt cameras.txt calib.txt scene2 im1.png im0.png images.txt cameras.txt calib.txt ... Args: root (str or ``pathlib.Path``): Root directory of the ETH3D Dataset. split (string, optional): The dataset split of scenes, either "train" (default) or "test". transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version. """_has_built_in_disparity_mask=Truedef__init__(self,root:Union[str,Path],split:str="train",transforms:Optional[Callable]=None)->None:super().__init__(root,transforms)verify_str_arg(split,"split",valid_values=("train","test"))root=Path(root)/"ETH3D"img_dir="two_view_training"ifsplit=="train"else"two_view_test"anot_dir="two_view_training_gt"left_img_pattern=str(root/img_dir/"*"/"im0.png")right_img_pattern=str(root/img_dir/"*"/"im1.png")self._images=self._scan_pairs(left_img_pattern,right_img_pattern)ifsplit=="test":self._disparities=list((None,None)for_inself._images)else:disparity_pattern=str(root/anot_dir/"*"/"disp0GT.pfm")self._disparities=self._scan_pairs(disparity_pattern,None)def_read_disparity(self,file_path:str)->Union[Tuple[None,None],Tuple[np.ndarray,np.ndarray]]:# test split has no disparity mapsiffile_pathisNone:returnNone,Nonedisparity_map=_read_pfm_file(file_path)disparity_map=np.abs(disparity_map)# ensure that the disparity is positivemask_path=Path(file_path).parent/"mask0nocc.png"valid_mask=Image.open(mask_path)valid_mask=np.asarray(valid_mask).astype(bool)returndisparity_map,valid_mask
[docs]def__getitem__(self,index:int)->T2:"""Return example at given index. Args: index(int): The index of the example to retrieve Returns: tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``. The disparity is a numpy array of shape (1, H, W) and the images are PIL images. ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not generate a valid mask. Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test. """returncast(T2,super().__getitem__(index))
Docs
Access comprehensive developer documentation for PyTorch
To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: Cookies Policy.