Create posterize transformation and refactor common methods to assist reuse. (#3108)

datumbox · web-flow · commit 4b800b922433 · 2020-12-03T17:47:31.000Z
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
@@ -289,13 +289,14 @@ def test_pad(self):
 
                     self._test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **kwargs)
 
-    def _test_adjust_fn(self, fn, fn_pil, fn_t, configs, tol=2.0 + 1e-10, agg_method="max"):
+    def _test_adjust_fn(self, fn, fn_pil, fn_t, configs, tol=2.0 + 1e-10, agg_method="max",
+                        dts=(None, torch.float32, torch.float64)):
         script_fn = torch.jit.script(fn)
         torch.manual_seed(15)
         tensor, pil_img = self._create_data(26, 34, device=self.device)
         batch_tensors = self._create_data_batch(16, 18, num_samples=4, device=self.device)
 
-        for dt in [None, torch.float32, torch.float64]:
+        for dt in dts:
 
             if dt is not None:
                 tensor = F.convert_image_dtype(tensor, dt)
@@ -872,6 +873,17 @@ def test_invert(self):
             agg_method="max"
         )
 
+    def test_posterize(self):
+        self._test_adjust_fn(
+            F.posterize,
+            F_pil.posterize,
+            F_t.posterize,
+            [{"bits": bits} for bits in range(0, 8)],
+            tol=1.0,
+            agg_method="max",
+            dts=(None,)
+        )
+
 
 @unittest.skipIf(not torch.cuda.is_available(), reason="Skip if no CUDA device")
 class CUDATester(Tester):
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -1761,17 +1761,16 @@ def _test_randomness(self, fn, trans, configs):
                 num_samples = 250
                 counts = 0
                 for _ in range(num_samples):
-                    out = trans(p=p, **config)(img)
+                    tranformation = trans(p=p, **config)
+                    tranformation.__repr__()
+                    out = tranformation(img)
                     if out == inv_img:
                         counts += 1
 
                 p_value = stats.binom_test(counts, num_samples, p=p)
                 random.setstate(random_state)
                 self.assertGreater(p_value, 0.0001)
 
-        # Checking if it can be printed as string
-        trans().__repr__()
-
     @unittest.skipIf(stats is None, 'scipy.stats not available')
     def test_random_invert(self):
         self._test_randomness(
@@ -1780,6 +1779,14 @@ def test_random_invert(self):
             [{}]
         )
 
+    @unittest.skipIf(stats is None, 'scipy.stats not available')
+    def test_random_posterize(self):
+        self._test_randomness(
+            F.posterize,
+            transforms.RandomPosterize,
+            [{"bits": 4}]
+        )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -92,6 +92,12 @@ def test_random_vertical_flip(self):
     def test_random_invert(self):
         self._test_op('invert', 'RandomInvert')
 
+    def test_random_posterize(self):
+        fn_kwargs = meth_kwargs = {"bits": 4}
+        self._test_op(
+            'posterize', 'RandomPosterize', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
     def test_color_jitter(self):
 
         tol = 1.0 + 1e-10
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -1196,3 +1196,24 @@ def invert(img: Tensor) -> Tensor:
         return F_pil.invert(img)
 
     return F_t.invert(img)
+
+
+def posterize(img: Tensor, bits: int) -> Tensor:
+    """Posterize a PIL Image or torch Tensor by reducing the number of bits for each color channel.
+
+    Args:
+        img (PIL Image or Tensor): Image to have its colors inverted.
+            If img is a Tensor, it is expected to be in [..., H, W] format,
+            where ... means it can have an arbitrary number of trailing
+            dimensions.
+        bits (int): The number of bits to keep for each channel (0-8).
+    Returns:
+        PIL Image: Posterized image.
+    """
+    if not (0 <= bits <= 8):
+        raise ValueError('The number if bits should be between 0 and 8. Got {}'.format(bits))
+
+    if not isinstance(img, torch.Tensor):
+        return F_pil.posterize(img, bits)
+
+    return F_t.posterize(img, bits)
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
@@ -613,3 +613,10 @@ def invert(img):
     if not _is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
     return ImageOps.invert(img)
+
+
+@torch.jit.unused
+def posterize(img, bits):
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    return ImageOps.posterize(img, bits)
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
@@ -1193,3 +1193,17 @@ def invert(img: Tensor) -> Tensor:
     bound = 1.0 if img.is_floating_point() else 255.0
     dtype = img.dtype if torch.is_floating_point(img) else torch.float32
     return (bound - img.to(dtype)).to(img.dtype)
+
+
+def posterize(img: Tensor, bits: int) -> Tensor:
+    if not _is_tensor_a_torch_image(img):
+        raise TypeError('tensor is not a torch image.')
+
+    if img.ndim < 3:
+        raise TypeError("Input image tensor should have at least 3 dimensions, but found {}".format(img.ndim))
+    if img.dtype != torch.uint8:
+        raise TypeError("Only torch.uint8 image tensors are supported, but found {}".format(img.dtype))
+
+    _assert_channels(img, [1, 3])
+    mask = -int(2**(8 - bits))  # JIT-friendly for: ~(2 ** (8 - bits) - 1)
+    return img & mask
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -21,7 +21,7 @@
            "CenterCrop", "Pad", "Lambda", "RandomApply", "RandomChoice", "RandomOrder", "RandomCrop",
            "RandomHorizontalFlip", "RandomVerticalFlip", "RandomResizedCrop", "RandomSizedCrop", "FiveCrop", "TenCrop",
            "LinearTransformation", "ColorJitter", "RandomRotation", "RandomAffine", "Grayscale", "RandomGrayscale",
-           "RandomPerspective", "RandomErasing", "GaussianBlur", "InterpolationMode", "RandomInvert"]
+           "RandomPerspective", "RandomErasing", "GaussianBlur", "InterpolationMode", "RandomInvert", "RandomPosterize"]
 
 
 class Compose:
@@ -1717,10 +1717,10 @@ def __init__(self, p=0.5):
 
     @staticmethod
     def get_params() -> float:
-        """Choose value for random color inversion.
+        """Choose a value for the random transformation.
 
         Returns:
-            float: Random value which is used to determine whether the random color inversion
+            float: Random value which is used to determine whether the random transformation
             should occur.
         """
         return torch.rand(1).item()
@@ -1739,3 +1739,45 @@ def forward(self, img):
 
     def __repr__(self):
         return self.__class__.__name__ + '(p={})'.format(self.p)
+
+
+class RandomPosterize(torch.nn.Module):
+    """Posterize the image randomly with a given probability by reducing the
+    number of bits for each color channel. The image can be a PIL Image or a torch
+    Tensor, in which case it is expected to have [..., H, W] shape, where ... means
+    an arbitrary number of leading dimensions
+
+    Args:
+        bits (int): number of bits to keep for each channel (0-8)
+        p (float): probability of the image being color inverted. Default value is 0.5
+    """
+
+    def __init__(self, bits, p=0.5):
+        super().__init__()
+        self.bits = bits
+        self.p = p
+
+    @staticmethod
+    def get_params() -> float:
+        """Choose a value for the random transformation.
+
+        Returns:
+            float: Random value which is used to determine whether the random transformation
+            should occur.
+        """
+        return torch.rand(1).item()
+
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be posterized.
+
+        Returns:
+            PIL Image or Tensor: Randomly posterized image.
+        """
+        if self.get_params() < self.p:
+            return F.posterize(img, self.bits)
+        return img
+
+    def __repr__(self):
+        return self.__class__.__name__ + '(bits={},p={})'.format(self.bits, self.p)