From efcf0cb5798e80c8ec1aebc1f70634371de8b8d3 Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Thu, 6 Jan 2022 13:19:55 +0530 Subject: [PATCH 01/15] [WIP] *added stanford_cars --- torchvision/datasets/standford_cars.py | 129 +++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 torchvision/datasets/standford_cars.py diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/standford_cars.py new file mode 100644 index 00000000000..5848bd569f1 --- /dev/null +++ b/torchvision/datasets/standford_cars.py @@ -0,0 +1,129 @@ +import os +import os.path +from typing import Callable, Optional + +import scipy.io as io +from PIL import Image + +from .utils import download_and_extract_archive, verify_str_arg, download_url +from .vision import VisionDataset + + +class StanfordCars(VisionDataset): + """`Stanford Cars `_ Dataset + + .. warning:: + + This class needs `scipy `_ to load target files from `.mat` format. + + Args: + root (string): Root directory of dataset + train (bool, optional): + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it.""" + + urls = ( + "https://ai.stanford.edu/~jkrause/car196/cars_test.tgz", + "https://ai.stanford.edu/~jkrause/car196/cars_train.tgz", + ) + + md5s = ("4ce7ebf6a94d07f1952d94dd34c4d501", "065e5b463ae28d29e77c1b4b166cfe61") + + annot_urls = ( + "https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat", + "https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz", + ) + extension = ".jpg" + + def __init__( + self, + root: str, + train: bool = True, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + download: bool = False, + ) -> None: + super().__init__(root, transform=transform, target_transform=target_transform) + self._download = download + self.root = os.path.expanduser(root) + self.train = train + self.url = self.urls[self.train] + + if self._download: + self.download() + + if not self._check_exists(): + raise RuntimeError("Dataset not found. You can use download=True to download it") + + self.class_names = self._get_class_names() + self.annotations = self._get_annotations() + + def _get_class_names(self): + """ + Returns Mapping of class ids to class names in form of Dictionary + """ + meta_data = io.loadmat(os.path.join(self.root, "devkit/cars_meta.mat")) + class_names = meta_data["class_names"][0] + return { + # Format class names appropriately for directory creation. + class_name[0].replace(" ", "_").replace("/", "_"): i + for i, class_name in enumerate(class_names) + } + + def _get_annotations(self): + """ + Returns Annotations for training data and testing data + """ + annotations = None + if self.train: + annotations = io.loadmat(os.path.join(self.root, "devkit/cars_train_annos.mat")) + else: + annotations = io.loadmat(os.path.join(self.root, "cars_test_annos_withlabels.mat")) + return annotations["annotations"][0] + + def __len__(self): + return self.annotations.shape[0] + + def __getitem__(self, idx: int): + """Returns pil_image and class_id for given index""" + target_id = self.annotations[idx][4][0, 0] + image_name = self.annotations[idx][5][0] + # 0,1,2,3 correspond to bounding boxes + + image_path = os.path.join(self.root, f"cars_{'train' if self.train else 'test' }", image_name) + # Beware: Stanford cars targets starts at 1 + target_id = target_id - 1 + pil_image = Image.open(image_path).convert("RGB") + + if self.transform is not None: + pil_image = self.transform(pil_image) + if self.target_transform is not None: + pil_image = self.target_transform(pil_image) + return pil_image, target_id + + + def download(self) -> None: + if self._check_exists(): + return + else: + download_and_extract_archive( + url=self.urls[self.train], + download_root=self.root, + extract_root=self.root, + md5=self.md5s[self.train] + ) + download_and_extract_archive( + url=self.annot_urls[1], download_root=self.root, extract_root=self.root ,md5="c3b158d763b6e2245038c8ad08e45376" + ) + if not self.train: + download_url( + url=self.annot_urls[0], filename="cars_test_annos_withlabels.mat", root=self.root ,md5="b0a2b23655a3edd16d84508592a98d10" + ) + def _check_exists(self) -> bool: + return os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.isdir(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.exists(os.path.join(self.root,"devkit/cars_meta.mat")) + From 62ca7bf9950589fb5be1f004764f5522e108ec63 Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Thu, 6 Jan 2022 13:23:07 +0530 Subject: [PATCH 02/15] [WIP] added stanfordCars to docs --- docs/source/datasets.rst | 1 + torchvision/datasets/__init__.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 8a71142f9e7..5b903aa3873 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -68,6 +68,7 @@ You can also create your own datasets using the provided :ref:`base classes Date: Thu, 6 Jan 2022 13:41:57 +0530 Subject: [PATCH 03/15] [WIP] minor edits --- torchvision/datasets/standford_cars.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/standford_cars.py index 5848bd569f1..77156e06c7f 100644 --- a/torchvision/datasets/standford_cars.py +++ b/torchvision/datasets/standford_cars.py @@ -63,7 +63,7 @@ def __init__( self.class_names = self._get_class_names() self.annotations = self._get_annotations() - def _get_class_names(self): + def _get_class_names(self) -> dict: """ Returns Mapping of class ids to class names in form of Dictionary """ @@ -86,10 +86,10 @@ def _get_annotations(self): annotations = io.loadmat(os.path.join(self.root, "cars_test_annos_withlabels.mat")) return annotations["annotations"][0] - def __len__(self): + def __len__(self) -> int: return self.annotations.shape[0] - def __getitem__(self, idx: int): + def __getitem__(self, idx: int) -> (Image,int): """Returns pil_image and class_id for given index""" target_id = self.annotations[idx][4][0, 0] image_name = self.annotations[idx][5][0] From 52cd5b9a9a9d3d2f7ec66d43e2e0f7254f7bdeea Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Thu, 6 Jan 2022 14:07:05 +0530 Subject: [PATCH 04/15] [WIP] minor edits --- torchvision/datasets/standford_cars.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/standford_cars.py index 77156e06c7f..17bcd00da98 100644 --- a/torchvision/datasets/standford_cars.py +++ b/torchvision/datasets/standford_cars.py @@ -5,7 +5,7 @@ import scipy.io as io from PIL import Image -from .utils import download_and_extract_archive, verify_str_arg, download_url +from .utils import download_and_extract_archive, download_url from .vision import VisionDataset @@ -125,5 +125,4 @@ def download(self) -> None: url=self.annot_urls[0], filename="cars_test_annos_withlabels.mat", root=self.root ,md5="b0a2b23655a3edd16d84508592a98d10" ) def _check_exists(self) -> bool: - return os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.isdir(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.exists(os.path.join(self.root,"devkit/cars_meta.mat")) - + return os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.isdir(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.exists(os.path.join(self.root,"devkit/cars_meta.mat")) \ No newline at end of file From 346036eb462bacb84476eafc8ba36e6733c9b3bd Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Sat, 8 Jan 2022 10:59:10 +0530 Subject: [PATCH 05/15] edited StanfordCars class --- test/test_datasets.py | 7 ++ torchvision/datasets/standford_cars.py | 91 +++++++++++++++----------- 2 files changed, 59 insertions(+), 39 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 5b5056e7ebe..c89eb1e8262 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2274,6 +2274,13 @@ def inject_fake_data(self, tmpdir, config): return num_samples +class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StanfordCars + REQUIRED_PACKAGES = ("scipy",) + FEATURE_TYPES = (PIL.Image.Image, int) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True,False)) + + if __name__ == "__main__": unittest.main() diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/standford_cars.py index 17bcd00da98..3d13e5b07b3 100644 --- a/torchvision/datasets/standford_cars.py +++ b/torchvision/datasets/standford_cars.py @@ -1,8 +1,6 @@ import os import os.path from typing import Callable, Optional - -import scipy.io as io from PIL import Image from .utils import download_and_extract_archive, download_url @@ -19,85 +17,96 @@ class StanfordCars(VisionDataset): Args: root (string): Root directory of dataset train (bool, optional): - download (bool, optional): If True, downloads the dataset from the internet and - puts it in root directory. If dataset is already downloaded, it is not - downloaded again. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.RandomCrop`` target_transform (callable, optional): A function/transform that takes in the - target and transforms it.""" + target and transforms it. + download (bool, optional): If True, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again.""" urls = ( "https://ai.stanford.edu/~jkrause/car196/cars_test.tgz", "https://ai.stanford.edu/~jkrause/car196/cars_train.tgz", ) - md5s = ("4ce7ebf6a94d07f1952d94dd34c4d501", "065e5b463ae28d29e77c1b4b166cfe61") + md5s = ("4ce7ebf6a94d07f1952d94dd34c4d501", "065e5b463ae28d29e77c1b4b166cfe61") # md5checksum for test and train annot_urls = ( "https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat", "https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz", ) - extension = ".jpg" + annot_md5s = ( + "b0a2b23655a3edd16d84508592a98d10", + "c3b158d763b6e2245038c8ad08e45376", + ) def __init__( - self, - root: str, - train: bool = True, - transform: Optional[Callable] = None, - target_transform: Optional[Callable] = None, - download: bool = False, + self, + root: str, + train: bool = True, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + download: bool = False, ) -> None: + + try: + from scipy.io import loadmat + + self._loadmat = loadmat + except ImportError: + raise RuntimeError("Scipy is not found. This dataset needs to have scipy installed: pip install scipy") + super().__init__(root, transform=transform, target_transform=target_transform) - self._download = download - self.root = os.path.expanduser(root) + self.train = train - self.url = self.urls[self.train] - if self._download: + if download: self.download() if not self._check_exists(): raise RuntimeError("Dataset not found. You can use download=True to download it") - self.class_names = self._get_class_names() - self.annotations = self._get_annotations() + self._samples = self._make_dataset() + self.classes = self._get_class_names() # class_id to class_name mapping def _get_class_names(self) -> dict: """ Returns Mapping of class ids to class names in form of Dictionary """ - meta_data = io.loadmat(os.path.join(self.root, "devkit/cars_meta.mat")) + meta_data = self._loadmat(os.path.join(self.root, "devkit/cars_meta.mat")) class_names = meta_data["class_names"][0] return { - # Format class names appropriately for directory creation. class_name[0].replace(" ", "_").replace("/", "_"): i for i, class_name in enumerate(class_names) } - def _get_annotations(self): + def _make_dataset(self): """ Returns Annotations for training data and testing data """ annotations = None if self.train: - annotations = io.loadmat(os.path.join(self.root, "devkit/cars_train_annos.mat")) + annotations = self._loadmat(os.path.join(self.root, "devkit/cars_train_annos.mat")) else: - annotations = io.loadmat(os.path.join(self.root, "cars_test_annos_withlabels.mat")) - return annotations["annotations"][0] + annotations = self._loadmat(os.path.join(self.root, "cars_test_annos_withlabels.mat")) + samples = [] + annotations = annotations["annotations"][0] + for index in range(len(annotations)): + target = annotations[index][4][0, 0] + image_file = annotations[index][5][0] + samples.append((image_file, target)) + return samples def __len__(self) -> int: - return self.annotations.shape[0] + return len(self._samples) - def __getitem__(self, idx: int) -> (Image,int): + def __getitem__(self, idx: int) -> (Image, int): """Returns pil_image and class_id for given index""" - target_id = self.annotations[idx][4][0, 0] - image_name = self.annotations[idx][5][0] - # 0,1,2,3 correspond to bounding boxes - - image_path = os.path.join(self.root, f"cars_{'train' if self.train else 'test' }", image_name) + image_file, target = self._samples[idx] + image_path = os.path.join(self.root, f"cars_{'train' if self.train else 'test'}", image_file) # Beware: Stanford cars targets starts at 1 - target_id = target_id - 1 + target_id = target - 1 pil_image = Image.open(image_path).convert("RGB") if self.transform is not None: @@ -106,7 +115,6 @@ def __getitem__(self, idx: int) -> (Image,int): pil_image = self.target_transform(pil_image) return pil_image, target_id - def download(self) -> None: if self._check_exists(): return @@ -118,11 +126,16 @@ def download(self) -> None: md5=self.md5s[self.train] ) download_and_extract_archive( - url=self.annot_urls[1], download_root=self.root, extract_root=self.root ,md5="c3b158d763b6e2245038c8ad08e45376" - ) + url=self.annot_urls[1], download_root=self.root, extract_root=self.root, + md5=self.annot_md5s[1] + ) if not self.train: download_url( - url=self.annot_urls[0], filename="cars_test_annos_withlabels.mat", root=self.root ,md5="b0a2b23655a3edd16d84508592a98d10" + url=self.annot_urls[0], filename="cars_test_annos_withlabels.mat", root=self.root, + md5=self.annot_md5s[0] ) + def _check_exists(self) -> bool: - return os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.isdir(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.exists(os.path.join(self.root,"devkit/cars_meta.mat")) \ No newline at end of file + return os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.isdir( + os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.exists( + os.path.join(self.root, "devkit/cars_meta.mat")) if self.train else os.path.exists(os.path.join(self.root,"cars_test_annos_withlabels.mat")) From db410b91a43f65c0b38cd749e1e6bbd33a6ad0ce Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Sat, 8 Jan 2022 14:25:58 +0530 Subject: [PATCH 06/15] Adding Testcase for stanford cars --- test/test_datasets.py | 41 ++++++++++++++++++++++++++ torchvision/datasets/standford_cars.py | 10 +++---- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 6ba39542236..3add5516d24 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2326,6 +2326,35 @@ class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, int) ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True,False)) + def _inject_fake_data(self, tmpdir, config): + root_folder = os.path.join(tmpdir, "stanforddcars") + + os.makedirs(root_folder, exist_ok=True) + + num_examples = 5 + if config["train"]: + #if train = True + datasets_utils.create_image_folder( + root=root_folder, + name="cars_train", + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_examples, + ) + + + else: + # test_folder i.e train = False + datasets_utils.create_image_folder( + root_folder, + "cars_test", + lambda image_index: f"{image_index:5d}.jpg", + num_examples + ) + + file = "cars_test_annos_withlabels.mat" + + + class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): @@ -2469,6 +2498,18 @@ def _meta_to_split_and_classification_ann(self, meta, idx): breed_id = "-1" return (image_id, class_id, species, breed_id) +class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): + DATASET_CLASS = datasets.StanfordCars + REQUIRED_PACKAGES = ("scipy",) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True,False)) + FEATURE_TYPES = (PIL.Image.Image,int) + + def _inject_fake_data(self, tmpdir, config): + + import scipy.io as io + train = config["train"] + num_examples= 5 + pass if __name__ == "__main__": unittest.main() diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/standford_cars.py index 3d13e5b07b3..94daeac8279 100644 --- a/torchvision/datasets/standford_cars.py +++ b/torchvision/datasets/standford_cars.py @@ -68,7 +68,7 @@ def __init__( raise RuntimeError("Dataset not found. You can use download=True to download it") self._samples = self._make_dataset() - self.classes = self._get_class_names() # class_id to class_name mapping + self.classes = self._get_classes_name() # class_id to class_name mapping def _get_class_names(self) -> dict: """ @@ -95,6 +95,8 @@ def _make_dataset(self): for index in range(len(annotations)): target = annotations[index][4][0, 0] image_file = annotations[index][5][0] + # Beware: Stanford cars targets starts at 1 + target = target - 1 samples.append((image_file, target)) return samples @@ -105,15 +107,13 @@ def __getitem__(self, idx: int) -> (Image, int): """Returns pil_image and class_id for given index""" image_file, target = self._samples[idx] image_path = os.path.join(self.root, f"cars_{'train' if self.train else 'test'}", image_file) - # Beware: Stanford cars targets starts at 1 - target_id = target - 1 pil_image = Image.open(image_path).convert("RGB") if self.transform is not None: pil_image = self.transform(pil_image) if self.target_transform is not None: - pil_image = self.target_transform(pil_image) - return pil_image, target_id + target = self.target_transform(target) + return pil_image, target def download(self) -> None: if self._check_exists(): From 0e91bf0454f35a35a58e0826d50a49e837a79d96 Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Sat, 8 Jan 2022 15:30:44 +0530 Subject: [PATCH 07/15] Added Testcase for stanford cars --- test/test_datasets.py | 42 ++++++++++++++++++- torchvision/datasets/standford_cars.py | 56 ++++++++++++++------------ 2 files changed, 70 insertions(+), 28 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 3add5516d24..98cdfadaee4 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2498,6 +2498,8 @@ def _meta_to_split_and_classification_ann(self, meta, idx): breed_id = "-1" return (image_id, class_id, species, breed_id) + + class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StanfordCars REQUIRED_PACKAGES = ("scipy",) @@ -2505,11 +2507,47 @@ class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): FEATURE_TYPES = (PIL.Image.Image,int) def _inject_fake_data(self, tmpdir, config): - import scipy.io as io + from numpy.core.records import fromarrays #to record arrays similar to matlab format train = config["train"] num_examples= 5 - pass + root_folder = os.path.join(tmpdir, "stanforddcars") + os.makedirs(root_folder, exist_ok=True) + + #generate random data for labels + class_name = np.random.randint(0, 100, num_examples, dtype=np.uint8) + bbox_x1 = np.random.randint(0, 100, num_examples, dtype=np.uint8) + bbox_x2 = np.random.randint(0, 100, num_examples, dtype=np.uint8) + + bbox_y1 = np.random.randint(0, 100, num_examples, dtype=np.uint8) + bb1ox_y2= np.random.randint(0, 100, num_examples, dtype=np.uint8) + fname = [f"{i:5d}.jpg" for i in range(num_examples)] + + rec_array = fromarrays([bbox_x1,bbox_y1,bbox_x2,bb1ox_y2,class_name,fname], ["bbox_x1","bbox_y1","bbox_x2","bbox_y2","class","fname"]) + + if train: + #create training image folder + datasets_utils.create_image_folder( + root=root_folder, + name="cars_train", + file_name_fn=lambda image_index : f"{image_index:5d}.jpg", + num_examples=num_examples, + ) + devkit = os.path.join(root_folder,"devkit") + io.savemat(f'{devkit}/cars_train_annos.mat', {'annotations': rec_array}) #save the recorded array as matlab file + else: + #create test image folder + datasets_utils.create_image_folder( + root=root_folder, + name="cars_test", + file_name_fn=lambda image_index : f"{image_index:5d}.jpg", + num_examples=num_examples + ) + io.savemat(f"{root_folder}/cars_test_annos_withlabels.mat", {"annotations":rec_array}) #save recorded array as matlab file + + return num_examples + + if __name__ == "__main__": unittest.main() diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/standford_cars.py index 94daeac8279..b319ee702f2 100644 --- a/torchvision/datasets/standford_cars.py +++ b/torchvision/datasets/standford_cars.py @@ -1,6 +1,7 @@ import os import os.path from typing import Callable, Optional + from PIL import Image from .utils import download_and_extract_archive, download_url @@ -16,7 +17,7 @@ class StanfordCars(VisionDataset): Args: root (string): Root directory of dataset - train (bool, optional): + train (bool, optional):If True, creates dataset from training set, otherwise creates from test set transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.RandomCrop`` target_transform (callable, optional): A function/transform that takes in the @@ -28,26 +29,30 @@ class StanfordCars(VisionDataset): urls = ( "https://ai.stanford.edu/~jkrause/car196/cars_test.tgz", "https://ai.stanford.edu/~jkrause/car196/cars_train.tgz", - ) + ) # test and train image urls - md5s = ("4ce7ebf6a94d07f1952d94dd34c4d501", "065e5b463ae28d29e77c1b4b166cfe61") # md5checksum for test and train + md5s = ( + "4ce7ebf6a94d07f1952d94dd34c4d501", + "065e5b463ae28d29e77c1b4b166cfe61", + ) # md5checksum for test and train data annot_urls = ( "https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat", "https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz", - ) + ) # annotations and labels for test and train + annot_md5s = ( "b0a2b23655a3edd16d84508592a98d10", "c3b158d763b6e2245038c8ad08e45376", - ) + ) # md5 checksum for annotations def __init__( - self, - root: str, - train: bool = True, - transform: Optional[Callable] = None, - target_transform: Optional[Callable] = None, - download: bool = False, + self, + root: str, + train: bool = True, + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + download: bool = False, ) -> None: try: @@ -76,10 +81,7 @@ def _get_class_names(self) -> dict: """ meta_data = self._loadmat(os.path.join(self.root, "devkit/cars_meta.mat")) class_names = meta_data["class_names"][0] - return { - class_name[0].replace(" ", "_").replace("/", "_"): i - for i, class_name in enumerate(class_names) - } + return {class_name[0].replace(" ", "_").replace("/", "_"): i for i, class_name in enumerate(class_names)} def _make_dataset(self): """ @@ -120,22 +122,24 @@ def download(self) -> None: return else: download_and_extract_archive( - url=self.urls[self.train], - download_root=self.root, - extract_root=self.root, - md5=self.md5s[self.train] + url=self.urls[self.train], download_root=self.root, extract_root=self.root, md5=self.md5s[self.train] ) download_and_extract_archive( - url=self.annot_urls[1], download_root=self.root, extract_root=self.root, - md5=self.annot_md5s[1] + url=self.annot_urls[1], download_root=self.root, extract_root=self.root, md5=self.annot_md5s[1] ) if not self.train: download_url( - url=self.annot_urls[0], filename="cars_test_annos_withlabels.mat", root=self.root, - md5=self.annot_md5s[0] + url=self.annot_urls[0], + filename="cars_test_annos_withlabels.mat", + root=self.root, + md5=self.annot_md5s[0], ) def _check_exists(self) -> bool: - return os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.isdir( - os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.exists( - os.path.join(self.root, "devkit/cars_meta.mat")) if self.train else os.path.exists(os.path.join(self.root,"cars_test_annos_withlabels.mat")) + return ( + os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) + and os.path.isdir(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) + and os.path.exists(os.path.join(self.root, "devkit/cars_meta.mat")) + if self.train + else os.path.exists(os.path.join(self.root, "cars_test_annos_withlabels.mat")) + ) From fbd3122c48d8e3ceb09f9a853cb0e56b2feb068c Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Sat, 8 Jan 2022 16:11:54 +0530 Subject: [PATCH 08/15] Added Testcase for stanford cars --- test/test_datasets.py | 86 ++++++++++++++----------------------------- 1 file changed, 28 insertions(+), 58 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 98cdfadaee4..5a1085dfa01 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2320,42 +2320,6 @@ def inject_fake_data(self, tmpdir, config): return num_samples -class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): - DATASET_CLASS = datasets.StanfordCars - REQUIRED_PACKAGES = ("scipy",) - FEATURE_TYPES = (PIL.Image.Image, int) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True,False)) - - def _inject_fake_data(self, tmpdir, config): - root_folder = os.path.join(tmpdir, "stanforddcars") - - os.makedirs(root_folder, exist_ok=True) - - num_examples = 5 - if config["train"]: - #if train = True - datasets_utils.create_image_folder( - root=root_folder, - name="cars_train", - file_name_fn=lambda image_index: f"{image_index:5d}.jpg", - num_examples=num_examples, - ) - - - else: - # test_folder i.e train = False - datasets_utils.create_image_folder( - root_folder, - "cars_test", - lambda image_index: f"{image_index:5d}.jpg", - num_examples - ) - - file = "cars_test_annos_withlabels.mat" - - - - class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.GTSRB @@ -2499,55 +2463,61 @@ def _meta_to_split_and_classification_ann(self, meta, idx): return (image_id, class_id, species, breed_id) - class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StanfordCars REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True,False)) - FEATURE_TYPES = (PIL.Image.Image,int) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + FEATURE_TYPES = (PIL.Image.Image, int) def _inject_fake_data(self, tmpdir, config): import scipy.io as io - from numpy.core.records import fromarrays #to record arrays similar to matlab format + from numpy.core.records import fromarrays # to record arrays similar to matlab format + train = config["train"] - num_examples= 5 + num_examples = 5 root_folder = os.path.join(tmpdir, "stanforddcars") os.makedirs(root_folder, exist_ok=True) - #generate random data for labels + # generate random data for labels class_name = np.random.randint(0, 100, num_examples, dtype=np.uint8) bbox_x1 = np.random.randint(0, 100, num_examples, dtype=np.uint8) bbox_x2 = np.random.randint(0, 100, num_examples, dtype=np.uint8) bbox_y1 = np.random.randint(0, 100, num_examples, dtype=np.uint8) - bb1ox_y2= np.random.randint(0, 100, num_examples, dtype=np.uint8) + bb1ox_y2 = np.random.randint(0, 100, num_examples, dtype=np.uint8) fname = [f"{i:5d}.jpg" for i in range(num_examples)] - rec_array = fromarrays([bbox_x1,bbox_y1,bbox_x2,bb1ox_y2,class_name,fname], ["bbox_x1","bbox_y1","bbox_x2","bbox_y2","class","fname"]) + rec_array = fromarrays( + [bbox_x1, bbox_y1, bbox_x2, bb1ox_y2, class_name, fname], + ["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2", "class", "fname"], + ) if train: - #create training image folder + # create training image folder datasets_utils.create_image_folder( - root=root_folder, - name="cars_train", - file_name_fn=lambda image_index : f"{image_index:5d}.jpg", - num_examples=num_examples, + root=root_folder, + name="cars_train", + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_examples, ) - devkit = os.path.join(root_folder,"devkit") - io.savemat(f'{devkit}/cars_train_annos.mat', {'annotations': rec_array}) #save the recorded array as matlab file + devkit = os.path.join(root_folder, "devkit") + io.savemat( + f"{devkit}/cars_train_annos.mat", {"annotations": rec_array} + ) # save the recorded array as matlab file else: - #create test image folder + # create test image folder datasets_utils.create_image_folder( - root=root_folder, - name="cars_test", - file_name_fn=lambda image_index : f"{image_index:5d}.jpg", - num_examples=num_examples + root=root_folder, + name="cars_test", + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_examples, ) - io.savemat(f"{root_folder}/cars_test_annos_withlabels.mat", {"annotations":rec_array}) #save recorded array as matlab file + io.savemat( + f"{root_folder}/cars_test_annos_withlabels.mat", {"annotations": rec_array} + ) # save recorded array as matlab file return num_examples - if __name__ == "__main__": unittest.main() From 817e9f240c5bb6908bae059574f3b5347609fd2e Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Sat, 8 Jan 2022 22:24:22 +0530 Subject: [PATCH 09/15] minor edit --- test/test_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 5a1085dfa01..55eb0794e09 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2489,7 +2489,7 @@ def _inject_fake_data(self, tmpdir, config): rec_array = fromarrays( [bbox_x1, bbox_y1, bbox_x2, bb1ox_y2, class_name, fname], - ["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2", "class", "fname"], + names=["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2", "class", "fname"], ) if train: From 52c98f34fdb6b2c986893c04cefbe9b4b1f4be4d Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Wed, 12 Jan 2022 00:09:37 +0530 Subject: [PATCH 10/15] made changes as per the suggestions --- test/test_datasets.py | 4 +- torchvision/datasets/standford_cars.py | 79 ++++++++++---------------- 2 files changed, 31 insertions(+), 52 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 55eb0794e09..44d40c6e098 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2467,7 +2467,6 @@ class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StanfordCars REQUIRED_PACKAGES = ("scipy",) ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) - FEATURE_TYPES = (PIL.Image.Image, int) def _inject_fake_data(self, tmpdir, config): import scipy.io as io @@ -2475,7 +2474,7 @@ def _inject_fake_data(self, tmpdir, config): train = config["train"] num_examples = 5 - root_folder = os.path.join(tmpdir, "stanforddcars") + root_folder = os.path.join(tmpdir) os.makedirs(root_folder, exist_ok=True) # generate random data for labels @@ -2501,6 +2500,7 @@ def _inject_fake_data(self, tmpdir, config): num_examples=num_examples, ) devkit = os.path.join(root_folder, "devkit") + os.makedirs(devkit) io.savemat( f"{devkit}/cars_train_annos.mat", {"annotations": rec_array} ) # save the recorded array as matlab file diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/standford_cars.py index b319ee702f2..8dcd8b7c6a6 100644 --- a/torchvision/datasets/standford_cars.py +++ b/torchvision/datasets/standford_cars.py @@ -1,6 +1,6 @@ import os import os.path -from typing import Callable, Optional +from typing import Callable, Optional, Any, Tuple from PIL import Image @@ -56,9 +56,7 @@ def __init__( ) -> None: try: - from scipy.io import loadmat - - self._loadmat = loadmat + import scipy.io as sio except ImportError: raise RuntimeError("Scipy is not found. This dataset needs to have scipy installed: pip install scipy") @@ -72,43 +70,29 @@ def __init__( if not self._check_exists(): raise RuntimeError("Dataset not found. You can use download=True to download it") - self._samples = self._make_dataset() - self.classes = self._get_classes_name() # class_id to class_name mapping - - def _get_class_names(self) -> dict: - """ - Returns Mapping of class ids to class names in form of Dictionary - """ - meta_data = self._loadmat(os.path.join(self.root, "devkit/cars_meta.mat")) - class_names = meta_data["class_names"][0] - return {class_name[0].replace(" ", "_").replace("/", "_"): i for i, class_name in enumerate(class_names)} - - def _make_dataset(self): - """ - Returns Annotations for training data and testing data - """ - annotations = None - if self.train: - annotations = self._loadmat(os.path.join(self.root, "devkit/cars_train_annos.mat")) - else: - annotations = self._loadmat(os.path.join(self.root, "cars_test_annos_withlabels.mat")) - samples = [] - annotations = annotations["annotations"][0] - for index in range(len(annotations)): - target = annotations[index][4][0, 0] - image_file = annotations[index][5][0] - # Beware: Stanford cars targets starts at 1 - target = target - 1 - samples.append((image_file, target)) - return samples + self._samples = [ + ( + os.path.join(self.root, f"cars_{'train' if self.train else 'test'}", annotation["fname"]), + annotation["class"] - 1, # Beware stanford cars target mapping starts from 1 + ) + for annotation in sio.loadmat( + os.path.join( + self.root, + *["devkit", "cars_train_annos.mat"] if self.train else ["cars_test_annos_withlabels.mat"], + ), + squeeze_me=True, + )["annotations"] + ] + + class_names = sio.loadmat(os.path.join(self.root, "devkit", "cars_meta.mat"))["class_names"][0] + self.classes = {class_name[0]: i for i, class_name in enumerate(class_names)} def __len__(self) -> int: return len(self._samples) - def __getitem__(self, idx: int) -> (Image, int): + def __getitem__(self, idx: int) -> Tuple[Any, Any]: """Returns pil_image and class_id for given index""" - image_file, target = self._samples[idx] - image_path = os.path.join(self.root, f"cars_{'train' if self.train else 'test'}", image_file) + image_path, target = self._samples[idx] pil_image = Image.open(image_path).convert("RGB") if self.transform is not None: @@ -120,26 +104,21 @@ def __getitem__(self, idx: int) -> (Image, int): def download(self) -> None: if self._check_exists(): return - else: - download_and_extract_archive( - url=self.urls[self.train], download_root=self.root, extract_root=self.root, md5=self.md5s[self.train] - ) - download_and_extract_archive( - url=self.annot_urls[1], download_root=self.root, extract_root=self.root, md5=self.annot_md5s[1] + + download_and_extract_archive(url=self.urls[self.train], download_root=self.root, md5=self.md5s[self.train]) + download_and_extract_archive(url=self.annot_urls[1], download_root=self.root, md5=self.annot_md5s[1]) + if not self.train: + download_url( + url=self.annot_urls[0], + root=self.root, + md5=self.annot_md5s[0], ) - if not self.train: - download_url( - url=self.annot_urls[0], - filename="cars_test_annos_withlabels.mat", - root=self.root, - md5=self.annot_md5s[0], - ) def _check_exists(self) -> bool: return ( os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) and os.path.isdir(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) - and os.path.exists(os.path.join(self.root, "devkit/cars_meta.mat")) + and os.path.exists(os.path.join(self.root, "devkit", "cars_meta.mat")) if self.train else os.path.exists(os.path.join(self.root, "cars_test_annos_withlabels.mat")) ) From 2eceade03a79f987593475e618f504c7afe5f0ba Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Wed, 12 Jan 2022 00:41:20 +0530 Subject: [PATCH 11/15] fixed typo in naming stanford_cars.py --- torchvision/datasets/__init__.py | 2 +- torchvision/datasets/{standford_cars.py => stanford_cars.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename torchvision/datasets/{standford_cars.py => stanford_cars.py} (100%) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 9a355f7ba32..480ee22d58a 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -27,7 +27,7 @@ from .sbd import SBDataset from .sbu import SBU from .semeion import SEMEION -from .standford_cars import StanfordCars +from .stanford_cars import StanfordCars from .stl10 import STL10 from .sun397 import SUN397 from .svhn import SVHN diff --git a/torchvision/datasets/standford_cars.py b/torchvision/datasets/stanford_cars.py similarity index 100% rename from torchvision/datasets/standford_cars.py rename to torchvision/datasets/stanford_cars.py From af25d727071d0c2eb870d4013c1fccd3521e706e Mon Sep 17 00:00:00 2001 From: Abhijit Deo <72816663+abhi-glitchhg@users.noreply.github.com> Date: Mon, 17 Jan 2022 14:07:50 +0530 Subject: [PATCH 12/15] cars_meta.mat file will be created in test --- test/test_datasets.py | 26 ++++++++++++-------------- torchvision/datasets/stanford_cars.py | 9 ++++++--- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 44d40c6e098..5f3dd42a90f 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2470,14 +2470,12 @@ class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): def _inject_fake_data(self, tmpdir, config): import scipy.io as io - from numpy.core.records import fromarrays # to record arrays similar to matlab format + from numpy.core.records import fromarrays train = config["train"] num_examples = 5 - root_folder = os.path.join(tmpdir) - os.makedirs(root_folder, exist_ok=True) + root_folder = tmpdir - # generate random data for labels class_name = np.random.randint(0, 100, num_examples, dtype=np.uint8) bbox_x1 = np.random.randint(0, 100, num_examples, dtype=np.uint8) bbox_x2 = np.random.randint(0, 100, num_examples, dtype=np.uint8) @@ -2490,31 +2488,31 @@ def _inject_fake_data(self, tmpdir, config): [bbox_x1, bbox_y1, bbox_x2, bb1ox_y2, class_name, fname], names=["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2", "class", "fname"], ) + devkit = os.path.join(root_folder, "devkit") + os.makedirs(devkit) + + random_class_names = ["Tesla Model S Sedan 2012"] * 196 + + io.savemat(os.path.join(devkit, "cars_meta.mat"), {"class_names": random_class_names}) if train: - # create training image folder datasets_utils.create_image_folder( root=root_folder, name="cars_train", file_name_fn=lambda image_index: f"{image_index:5d}.jpg", num_examples=num_examples, ) - devkit = os.path.join(root_folder, "devkit") - os.makedirs(devkit) - io.savemat( - f"{devkit}/cars_train_annos.mat", {"annotations": rec_array} - ) # save the recorded array as matlab file + + io.savemat(f"{devkit}/cars_train_annos.mat", {"annotations": rec_array}) else: - # create test image folder + datasets_utils.create_image_folder( root=root_folder, name="cars_test", file_name_fn=lambda image_index: f"{image_index:5d}.jpg", num_examples=num_examples, ) - io.savemat( - f"{root_folder}/cars_test_annos_withlabels.mat", {"annotations": rec_array} - ) # save recorded array as matlab file + io.savemat(f"{root_folder}/cars_test_annos_withlabels.mat", {"annotations": rec_array}) return num_examples diff --git a/torchvision/datasets/stanford_cars.py b/torchvision/datasets/stanford_cars.py index 8dcd8b7c6a6..9af9dc3fb49 100644 --- a/torchvision/datasets/stanford_cars.py +++ b/torchvision/datasets/stanford_cars.py @@ -73,7 +73,8 @@ def __init__( self._samples = [ ( os.path.join(self.root, f"cars_{'train' if self.train else 'test'}", annotation["fname"]), - annotation["class"] - 1, # Beware stanford cars target mapping starts from 1 + annotation["class"] - 1, + # Beware stanford cars target mapping starts from 1 ) for annotation in sio.loadmat( os.path.join( @@ -84,8 +85,10 @@ def __init__( )["annotations"] ] - class_names = sio.loadmat(os.path.join(self.root, "devkit", "cars_meta.mat"))["class_names"][0] - self.classes = {class_name[0]: i for i, class_name in enumerate(class_names)} + self.classes = sio.loadmat(os.path.join(self.root, "devkit", "cars_meta.mat"), squeeze_me=True)[ + "class_names" + ].tolist() + self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)} def __len__(self) -> int: return len(self._samples) From 8fceb0b661f2988ee2c39f30834368d1b72fd98b Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 17 Jan 2022 17:57:56 +0000 Subject: [PATCH 13/15] Some cleanups --- test/test_datasets.py | 64 +++++++---------- torchvision/datasets/stanford_cars.py | 100 ++++++++++++-------------- 2 files changed, 74 insertions(+), 90 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index dad536d48e1..8a5dba326ff 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2517,53 +2517,43 @@ def _meta_to_split_and_classification_ann(self, meta, idx): class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StanfordCars REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) - def _inject_fake_data(self, tmpdir, config): + def inject_fake_data(self, tmpdir, config): import scipy.io as io from numpy.core.records import fromarrays - train = config["train"] - num_examples = 5 - root_folder = tmpdir + num_examples = {"train": 5, "test": 7}[config["split"]] + num_classes = 3 + base_folder = pathlib.Path(tmpdir) / "stanford_cars" - class_name = np.random.randint(0, 100, num_examples, dtype=np.uint8) - bbox_x1 = np.random.randint(0, 100, num_examples, dtype=np.uint8) - bbox_x2 = np.random.randint(0, 100, num_examples, dtype=np.uint8) + devkit = base_folder / "devkit" + devkit.mkdir(parents=True) - bbox_y1 = np.random.randint(0, 100, num_examples, dtype=np.uint8) - bb1ox_y2 = np.random.randint(0, 100, num_examples, dtype=np.uint8) - fname = [f"{i:5d}.jpg" for i in range(num_examples)] + if config["split"] == "train": + images_folder_name = "cars_train" + annotations_mat_path = str(devkit / "cars_train_annos.mat") + else: + images_folder_name = "cars_test" + annotations_mat_path = str(base_folder / "cars_test_annos_withlabels.mat") - rec_array = fromarrays( - [bbox_x1, bbox_y1, bbox_x2, bb1ox_y2, class_name, fname], - names=["bbox_x1", "bbox_y1", "bbox_x2", "bbox_y2", "class", "fname"], + datasets_utils.create_image_folder( + root=base_folder, + name=images_folder_name, + file_name_fn=lambda image_index: f"{image_index:5d}.jpg", + num_examples=num_examples, ) - devkit = os.path.join(root_folder, "devkit") - os.makedirs(devkit) - - random_class_names = ["Tesla Model S Sedan 2012"] * 196 - - io.savemat(os.path.join(devkit, "cars_meta.mat"), {"class_names": random_class_names}) - if train: - datasets_utils.create_image_folder( - root=root_folder, - name="cars_train", - file_name_fn=lambda image_index: f"{image_index:5d}.jpg", - num_examples=num_examples, - ) - - io.savemat(f"{devkit}/cars_train_annos.mat", {"annotations": rec_array}) - else: + classes = np.random.randint(1, num_classes + 1, num_examples, dtype=np.uint8) + fnames = [f"{i:5d}.jpg" for i in range(num_examples)] + rec_array = fromarrays( + [classes, fnames], + names=["class", "fname"], + ) + io.savemat(annotations_mat_path, {"annotations": rec_array}) - datasets_utils.create_image_folder( - root=root_folder, - name="cars_test", - file_name_fn=lambda image_index: f"{image_index:5d}.jpg", - num_examples=num_examples, - ) - io.savemat(f"{root_folder}/cars_test_annos_withlabels.mat", {"annotations": rec_array}) + random_class_names = ["random_name"] * num_classes + io.savemat(str(devkit / "cars_meta.mat"), {"class_names": random_class_names}) return num_examples diff --git a/torchvision/datasets/stanford_cars.py b/torchvision/datasets/stanford_cars.py index 9af9dc3fb49..60f0c0df2d1 100644 --- a/torchvision/datasets/stanford_cars.py +++ b/torchvision/datasets/stanford_cars.py @@ -1,23 +1,26 @@ -import os -import os.path +import pathlib from typing import Callable, Optional, Any, Tuple from PIL import Image -from .utils import download_and_extract_archive, download_url +from .utils import download_and_extract_archive, download_url, verify_str_arg from .vision import VisionDataset class StanfordCars(VisionDataset): """`Stanford Cars `_ Dataset - .. warning:: + The Cars dataset contains 16,185 images of 196 classes of cars. The data is + split into 8,144 training images and 8,041 testing images, where each class + has been split roughly in a 50-50 split + + .. note:: This class needs `scipy `_ to load target files from `.mat` format. Args: root (string): Root directory of dataset - train (bool, optional):If True, creates dataset from training set, otherwise creates from test set + split (string, optional): The dataset split, supports ``"train"`` (default) or ``"test"``. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.RandomCrop`` target_transform (callable, optional): A function/transform that takes in the @@ -26,30 +29,10 @@ class StanfordCars(VisionDataset): puts it in root directory. If dataset is already downloaded, it is not downloaded again.""" - urls = ( - "https://ai.stanford.edu/~jkrause/car196/cars_test.tgz", - "https://ai.stanford.edu/~jkrause/car196/cars_train.tgz", - ) # test and train image urls - - md5s = ( - "4ce7ebf6a94d07f1952d94dd34c4d501", - "065e5b463ae28d29e77c1b4b166cfe61", - ) # md5checksum for test and train data - - annot_urls = ( - "https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat", - "https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz", - ) # annotations and labels for test and train - - annot_md5s = ( - "b0a2b23655a3edd16d84508592a98d10", - "c3b158d763b6e2245038c8ad08e45376", - ) # md5 checksum for annotations - def __init__( self, root: str, - train: bool = True, + split: str = "train", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, @@ -62,7 +45,16 @@ def __init__( super().__init__(root, transform=transform, target_transform=target_transform) - self.train = train + self._split = verify_str_arg(split, "split", ("train", "test")) + self._base_folder = pathlib.Path(root) / "stanford_cars" + devkit = self._base_folder / "devkit" + + if self._split == "train": + self._annotations_mat_path = devkit / "cars_train_annos.mat" + self._images_base_path = self._base_folder / "cars_train" + else: + self._annotations_mat_path = self._base_folder / "cars_test_annos_withlabels.mat" + self._images_base_path = self._base_folder / "cars_test" if download: self.download() @@ -72,22 +64,13 @@ def __init__( self._samples = [ ( - os.path.join(self.root, f"cars_{'train' if self.train else 'test'}", annotation["fname"]), - annotation["class"] - 1, - # Beware stanford cars target mapping starts from 1 + str(self._images_base_path / annotation["fname"]), + annotation["class"] - 1, # Original target mapping starts from 1, hence -1 ) - for annotation in sio.loadmat( - os.path.join( - self.root, - *["devkit", "cars_train_annos.mat"] if self.train else ["cars_test_annos_withlabels.mat"], - ), - squeeze_me=True, - )["annotations"] + for annotation in sio.loadmat(self._annotations_mat_path, squeeze_me=True)["annotations"] ] - self.classes = sio.loadmat(os.path.join(self.root, "devkit", "cars_meta.mat"), squeeze_me=True)[ - "class_names" - ].tolist() + self.classes = sio.loadmat(str(devkit / "cars_meta.mat"), squeeze_me=True)["class_names"].tolist() self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)} def __len__(self) -> int: @@ -108,20 +91,31 @@ def download(self) -> None: if self._check_exists(): return - download_and_extract_archive(url=self.urls[self.train], download_root=self.root, md5=self.md5s[self.train]) - download_and_extract_archive(url=self.annot_urls[1], download_root=self.root, md5=self.annot_md5s[1]) - if not self.train: + download_and_extract_archive( + url="https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz", + download_root=self._base_folder, + md5="c3b158d763b6e2245038c8ad08e45376", + ) + if self._split == "train": + download_and_extract_archive( + url="https://ai.stanford.edu/~jkrause/car196/cars_train.tgz", + download_root=self._base_folder, + md5="065e5b463ae28d29e77c1b4b166cfe61", + ) + else: + download_and_extract_archive( + url="https://ai.stanford.edu/~jkrause/car196/cars_test.tgz", + download_root=self._base_folder, + md5="4ce7ebf6a94d07f1952d94dd34c4d501", + ) download_url( - url=self.annot_urls[0], - root=self.root, - md5=self.annot_md5s[0], + url="https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat", + root=self._base_folder, + md5="b0a2b23655a3edd16d84508592a98d10", ) def _check_exists(self) -> bool: - return ( - os.path.exists(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) - and os.path.isdir(os.path.join(self.root, f"cars_{'train' if self.train else 'test'}")) - and os.path.exists(os.path.join(self.root, "devkit", "cars_meta.mat")) - if self.train - else os.path.exists(os.path.join(self.root, "cars_test_annos_withlabels.mat")) - ) + if not (self._base_folder / "devkit").is_dir(): + return False + + return self._annotations_mat_path.exists() and self._images_base_path.is_dir() From dc463d767b850eb4f3a00f8faf4b4399cd2bbd3c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 17 Jan 2022 18:19:13 +0000 Subject: [PATCH 14/15] Sigh --- torchvision/datasets/stanford_cars.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/torchvision/datasets/stanford_cars.py b/torchvision/datasets/stanford_cars.py index 60f0c0df2d1..daca0b0b46a 100644 --- a/torchvision/datasets/stanford_cars.py +++ b/torchvision/datasets/stanford_cars.py @@ -93,24 +93,24 @@ def download(self) -> None: download_and_extract_archive( url="https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz", - download_root=self._base_folder, + download_root=str(self._base_folder), md5="c3b158d763b6e2245038c8ad08e45376", ) if self._split == "train": download_and_extract_archive( url="https://ai.stanford.edu/~jkrause/car196/cars_train.tgz", - download_root=self._base_folder, + download_root=str(self._base_folder), md5="065e5b463ae28d29e77c1b4b166cfe61", ) else: download_and_extract_archive( url="https://ai.stanford.edu/~jkrause/car196/cars_test.tgz", - download_root=self._base_folder, + download_root=str(self._base_folder), md5="4ce7ebf6a94d07f1952d94dd34c4d501", ) download_url( url="https://ai.stanford.edu/~jkrause/car196/cars_test_annos_withlabels.mat", - root=self._base_folder, + root=str(self._base_folder), md5="b0a2b23655a3edd16d84508592a98d10", ) From b593760a0eece516fb10b58eec7b0740b4e605b8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 18 Jan 2022 09:54:52 +0000 Subject: [PATCH 15/15] don't convert to strings --- test/test_datasets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index a0ba37d49e8..b7e3d8e55fb 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -2532,10 +2532,10 @@ def inject_fake_data(self, tmpdir, config): if config["split"] == "train": images_folder_name = "cars_train" - annotations_mat_path = str(devkit / "cars_train_annos.mat") + annotations_mat_path = devkit / "cars_train_annos.mat" else: images_folder_name = "cars_test" - annotations_mat_path = str(base_folder / "cars_test_annos_withlabels.mat") + annotations_mat_path = base_folder / "cars_test_annos_withlabels.mat" datasets_utils.create_image_folder( root=base_folder, @@ -2553,7 +2553,7 @@ def inject_fake_data(self, tmpdir, config): io.savemat(annotations_mat_path, {"annotations": rec_array}) random_class_names = ["random_name"] * num_classes - io.savemat(str(devkit / "cars_meta.mat"), {"class_names": random_class_names}) + io.savemat(devkit / "cars_meta.mat", {"class_names": random_class_names}) return num_examples