[fbsync] Add Kitti and Sintel datasets for optical flow (#4845)

prabhat00155 · facebook-github-bot · commit 315f1a2975f8 · 2021-11-08T08:49:16.000-08:00
Reviewed By: kazhang

Differential Revision: D32216685

fbshipit-source-id: ec74c2a573eace36bd4a0cf9913ea1dc77fcf260
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -48,6 +48,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
     INaturalist
     Kinetics400
     Kitti
+    KittiFlow
     KMNIST
     LFWPeople
     LFWPairs
@@ -60,6 +61,7 @@ You can also create your own datasets using the provided :ref:`base classes <bas
     SBDataset
     SBU
     SEMEION
+    Sintel
     STL10
     SVHN
     UCF101
diff --git a/test/datasets_utils.py b/test/datasets_utils.py
@@ -203,6 +203,7 @@ class DatasetTestCase(unittest.TestCase):
             ``transforms``, or ``download``.
         - REQUIRED_PACKAGES (Iterable[str]): Additional dependencies to use the dataset. If these packages are not
             available, the tests are skipped.
+        - EXTRA_PATCHES(set): Additional patches to add for each test, to e.g. mock a specific function
 
     Additionally, you need to overwrite the ``inject_fake_data()`` method that provides the data that the tests rely on.
     The fake data should resemble the original data as close as necessary, while containing only few examples. During
@@ -254,6 +255,8 @@ def test_baz(self):
     ADDITIONAL_CONFIGS = None
     REQUIRED_PACKAGES = None
 
+    EXTRA_PATCHES = None
+
     # These keyword arguments are checked by test_transforms in case they are available in DATASET_CLASS.
     _TRANSFORM_KWARGS = {
         "transform",
@@ -379,14 +382,17 @@ def create_dataset(
         if patch_checks:
             patchers.update(self._patch_checks())
 
+        if self.EXTRA_PATCHES is not None:
+            patchers.update(self.EXTRA_PATCHES)
+
         with get_tmp_dir() as tmpdir:
             args = self.dataset_args(tmpdir, complete_config)
             info = self._inject_fake_data(tmpdir, complete_config) if inject_fake_data else None
 
             with self._maybe_apply_patches(patchers), disable_console_output():
                 dataset = self.DATASET_CLASS(*args, **complete_config, **special_kwargs)
 
-            yield dataset, info
+                yield dataset, info
 
     @classmethod
     def setUpClass(cls):
diff --git a/test/test_datasets.py b/test/test_datasets.py
@@ -1871,5 +1871,132 @@ def _inject_pairs(self, root, num_pairs, same):
             datasets_utils.create_image_folder(root, name2, lambda _: f"{name2}_{no2:04d}.jpg", 1, 250)
 
 
+class SintelTestCase(datasets_utils.ImageDatasetTestCase):
+    DATASET_CLASS = datasets.Sintel
+    ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"), pass_name=("clean", "final"))
+    # We patch the flow reader, because this would otherwise force us to generate fake (but readable) .flo files,
+    # which is something we want to # avoid.
+    _FAKE_FLOW = "Fake Flow"
+    EXTRA_PATCHES = {unittest.mock.patch("torchvision.datasets.Sintel._read_flow", return_value=_FAKE_FLOW)}
+    FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (type(_FAKE_FLOW), type(None)))
+
+    def inject_fake_data(self, tmpdir, config):
+        root = pathlib.Path(tmpdir) / "Sintel"
+
+        num_images_per_scene = 3 if config["split"] == "train" else 4
+        num_scenes = 2
+
+        for split_dir in ("training", "test"):
+            for pass_name in ("clean", "final"):
+                image_root = root / split_dir / pass_name
+
+                for scene_id in range(num_scenes):
+                    scene_dir = image_root / f"scene_{scene_id}"
+                    datasets_utils.create_image_folder(
+                        image_root,
+                        name=str(scene_dir),
+                        file_name_fn=lambda image_idx: f"frame_000{image_idx}.png",
+                        num_examples=num_images_per_scene,
+                    )
+
+        # For the ground truth flow value we just create empty files so that they're properly discovered,
+        # see comment above about EXTRA_PATCHES
+        flow_root = root / "training" / "flow"
+        for scene_id in range(num_scenes):
+            scene_dir = flow_root / f"scene_{scene_id}"
+            os.makedirs(scene_dir)
+            for i in range(num_images_per_scene - 1):
+                open(str(scene_dir / f"frame_000{i}.flo"), "a").close()
+
+        # with e.g. num_images_per_scene = 3, for a single scene with have 3 images
+        # which are frame_0000, frame_0001 and frame_0002
+        # They will be consecutively paired as (frame_0000, frame_0001), (frame_0001, frame_0002),
+        # that is 3 - 1 = 2 examples. Hence the formula below
+        num_examples = (num_images_per_scene - 1) * num_scenes
+        return num_examples
+
+    def test_flow(self):
+        # Make sure flow exists for train split, and make sure there are as many flow values as (pairs of) images
+        with self.create_dataset(split="train") as (dataset, _):
+            assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list)
+            for _, _, flow in dataset:
+                assert flow == self._FAKE_FLOW
+
+        # Make sure flow is always None for test split
+        with self.create_dataset(split="test") as (dataset, _):
+            assert dataset._image_list and not dataset._flow_list
+            for _, _, flow in dataset:
+                assert flow is None
+
+    def test_bad_input(self):
+        with pytest.raises(ValueError, match="split must be either"):
+            with self.create_dataset(split="bad"):
+                pass
+
+        with pytest.raises(ValueError, match="pass_name must be either"):
+            with self.create_dataset(pass_name="bad"):
+                pass
+
+
+class KittiFlowTestCase(datasets_utils.ImageDatasetTestCase):
+    DATASET_CLASS = datasets.KittiFlow
+    ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"))
+    FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None)))
+
+    def inject_fake_data(self, tmpdir, config):
+        root = pathlib.Path(tmpdir) / "Kitti"
+
+        num_examples = 2 if config["split"] == "train" else 3
+        for split_dir in ("training", "testing"):
+
+            datasets_utils.create_image_folder(
+                root / split_dir,
+                name="image_2",
+                file_name_fn=lambda image_idx: f"{image_idx}_10.png",
+                num_examples=num_examples,
+            )
+            datasets_utils.create_image_folder(
+                root / split_dir,
+                name="image_2",
+                file_name_fn=lambda image_idx: f"{image_idx}_11.png",
+                num_examples=num_examples,
+            )
+
+        # For kitti the ground truth flows are encoded as 16-bits pngs.
+        # create_image_folder() will actually create 8-bits pngs, but it doesn't
+        # matter much: the flow reader will still be able to read the files, it
+        # will just be garbage flow value - but we don't care about that here.
+        datasets_utils.create_image_folder(
+            root / "training",
+            name="flow_occ",
+            file_name_fn=lambda image_idx: f"{image_idx}_10.png",
+            num_examples=num_examples,
+        )
+
+        return num_examples
+
+    def test_flow_and_valid(self):
+        # Make sure flow exists for train split, and make sure there are as many flow values as (pairs of) images
+        # Also assert flow and valid are of the expected shape
+        with self.create_dataset(split="train") as (dataset, _):
+            assert dataset._flow_list and len(dataset._flow_list) == len(dataset._image_list)
+            for _, _, flow, valid in dataset:
+                two, h, w = flow.shape
+                assert two == 2
+                assert valid.shape == (h, w)
+
+        # Make sure flow and valid are always None for test split
+        with self.create_dataset(split="test") as (dataset, _):
+            assert dataset._image_list and not dataset._flow_list
+            for _, _, flow, valid in dataset:
+                assert flow is None
+                assert valid is None
+
+    def test_bad_input(self):
+        with pytest.raises(ValueError, match="split must be either"):
+            with self.create_dataset(split="bad"):
+                pass
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py
@@ -1,3 +1,4 @@
+from ._optical_flow import KittiFlow, Sintel
 from .caltech import Caltech101, Caltech256
 from .celeba import CelebA
 from .cifar import CIFAR10, CIFAR100
@@ -71,4 +72,6 @@
     "INaturalist",
     "LFWPeople",
     "LFWPairs",
+    "KittiFlow",
+    "Sintel",
 )
diff --git a/torchvision/datasets/_optical_flow.py b/torchvision/datasets/_optical_flow.py