From d3a7727836ecc4ba54cc52ee6e53b47f03b95ab5 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 5 Nov 2020 09:04:38 +0000 Subject: [PATCH] House keeping improvements: - fixed problem with error computation between results - refactored tensor cast for resize - fixed round usage --- test/common_utils.py | 3 +- test/test_functional_tensor.py | 4 +-- test/test_transforms_tensor.py | 4 +-- torchvision/transforms/functional_tensor.py | 36 ++++++++------------- 4 files changed, 19 insertions(+), 28 deletions(-) diff --git a/test/common_utils.py b/test/common_utils.py index 5368018e971..eab6910baf3 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -361,7 +361,8 @@ def approxEqualTensorToPIL(self, tensor, pil_image, tol=1e-5, msg=None, agg_meth if np_pil_image.ndim == 2: np_pil_image = np_pil_image[:, :, None] pil_tensor = torch.as_tensor(np_pil_image.transpose((2, 0, 1))).to(tensor) - err = getattr(torch, agg_method)(tensor - pil_tensor).item() + # error value can be mean absolute error, max abs error + err = getattr(torch, agg_method)(torch.abs(tensor - pil_tensor)).item() self.assertTrue( err < tol, msg="{}: err={}, tol={}: \n{}\nvs\n{}".format(msg, err, tol, tensor[0, :10, :10], pil_tensor[0, :10, :10]) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index e91e9321107..50f23c6f686 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -352,8 +352,8 @@ def test_adjust_hue(self): F_pil.adjust_hue, F_t.adjust_hue, [{"hue_factor": f} for f in [-0.45, -0.25, 0.0, 0.25, 0.45]], - tol=0.1, - agg_method="mean" + tol=16.1, + agg_method="max" ) def test_adjust_gamma(self): diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 1fc0ab61ec4..aafd862d351 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -111,13 +111,13 @@ def test_color_jitter(self): for f in [0.2, 0.5, (-0.2, 0.3), [-0.4, 0.5]]: meth_kwargs = {"hue": f} self._test_class_op( - "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=0.1, agg_method="mean" + "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=16.1, agg_method="max" ) # All 4 parameters together meth_kwargs = {"brightness": 0.2, "contrast": 0.2, "saturation": 0.2, "hue": 0.2} self._test_class_op( - "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=0.1, agg_method="mean" + "ColorJitter", meth_kwargs=meth_kwargs, test_exact_match=False, tol=12.1, agg_method="max" ) def test_pad(self): diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index d27e6066fe3..42a686bb726 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -822,32 +822,19 @@ def resize(img: Tensor, size: List[int], interpolation: int = 2) -> Tensor: if (w <= h and w == size_w) or (h <= w and h == size_h): return img - # make image NCHW - need_squeeze = False - if img.ndim < 4: - img = img.unsqueeze(dim=0) - need_squeeze = True - mode = _interpolation_modes[interpolation] - out_dtype = img.dtype - need_cast = False - if img.dtype not in (torch.float32, torch.float64): - need_cast = True - img = img.to(torch.float32) + img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [torch.float32, torch.float64]) # Define align_corners to avoid warnings align_corners = False if mode in ["bilinear", "bicubic"] else None img = interpolate(img, size=[size_h, size_w], mode=mode, align_corners=align_corners) - if need_squeeze: - img = img.squeeze(dim=0) + if mode == "bicubic" and out_dtype == torch.uint8: + img = img.clamp(min=0, max=255) - if need_cast: - if mode == "bicubic": - img = img.clamp(min=0, max=255) - img = img.to(out_dtype) + img = _cast_squeeze_out(img, need_cast=need_cast, need_squeeze=need_squeeze, out_dtype=out_dtype) return img @@ -879,7 +866,7 @@ def _assert_grid_transform_inputs( raise ValueError("Resampling mode '{}' is unsupported with Tensor input".format(resample)) -def _cast_squeeze_in(img: Tensor, req_dtype: torch.dtype) -> Tuple[Tensor, bool, bool, torch.dtype]: +def _cast_squeeze_in(img: Tensor, req_dtypes: List[torch.dtype]) -> Tuple[Tensor, bool, bool, torch.dtype]: need_squeeze = False # make image NCHW if img.ndim < 4: @@ -888,8 +875,9 @@ def _cast_squeeze_in(img: Tensor, req_dtype: torch.dtype) -> Tuple[Tensor, bool, out_dtype = img.dtype need_cast = False - if out_dtype != req_dtype: + if out_dtype not in req_dtypes: need_cast = True + req_dtype = req_dtypes[0] img = img.to(req_dtype) return img, need_cast, need_squeeze, out_dtype @@ -899,15 +887,17 @@ def _cast_squeeze_out(img: Tensor, need_cast: bool, need_squeeze: bool, out_dtyp img = img.squeeze(dim=0) if need_cast: - # it is better to round before cast - img = torch.round(img).to(out_dtype) + if out_dtype in (torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64): + # it is better to round before cast + img = torch.round(img) + img = img.to(out_dtype) return img def _apply_grid_transform(img: Tensor, grid: Tensor, mode: str) -> Tensor: - img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, grid.dtype) + img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [grid.dtype, ]) if img.shape[0] > 1: # Apply same grid to a batch of images @@ -1168,7 +1158,7 @@ def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: List[float]) -> Te kernel = _get_gaussian_kernel2d(kernel_size, sigma, dtype=dtype, device=img.device) kernel = kernel.expand(img.shape[-3], 1, kernel.shape[0], kernel.shape[1]) - img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, kernel.dtype) + img, need_cast, need_squeeze, out_dtype = _cast_squeeze_in(img, [kernel.dtype, ]) # padding = (left, right, top, bottom) padding = [kernel_size[0] // 2, kernel_size[0] // 2, kernel_size[1] // 2, kernel_size[1] // 2]