From 808b375768b6daaa085e8c4dda4e439844e60378 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:08:17 +0300 Subject: [PATCH 01/13] TST: whitelist corr and cov. Add test --- doc/example.feather | Bin 0 -> 1120 bytes pandas/core/window/common.py | 2 ++ pandas/tests/window/test_base_indexer.py | 38 ++++++++++++++++++++++- 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 doc/example.feather diff --git a/doc/example.feather b/doc/example.feather new file mode 100644 index 0000000000000000000000000000000000000000..38dbf087a1590606ae40aaab16fd2d592b771b0e GIT binary patch literal 1120 zcmZuxyGlbr5S^R5Nz{a}NW>HtK_sM7>|!IaNGdxCSXd;{_yCQdR#tw3l~`I>OA5ci zQbZ6$tgQTs#B+9c!YinJCM9>)^273K{XQhooJB2<^&GtHH$(bNO86J_M!J zyGzt-fVTu_y`vB(l0%(5PA9RI8omTJPF`^*F6IUYC9RG~2~6L$#r{j48ZpBzkgR*? zeK=XyxVoIL>-SgCJxtfJc!iX{qHf5i{ID};{XtI7TL*ORLEuwa*BRfN-m;sO{Zgj$ ztzwsCRRGO(Kw^-p-}PKGd}}=UlwnHzzIVB^>*KSHVAFV==PAS|XXz*6tt_YQ$5f~C oX+L~3>;6heD7rV}T*>^s5KCq8YfagILHpBwHUBLPZTxTf15K=C=l}o! literal 0 HcmV?d00001 diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 082c2f533f3de..4b211258a1859 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -340,6 +340,8 @@ def validate_baseindexer_support(func_name: Optional[str]) -> None: "skew", "kurt", "quantile", + "cov", + "corr", } if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: raise NotImplementedError( diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 15e6a904dd11a..9542c25b4285d 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -82,7 +82,7 @@ def get_window_bounds(self, num_values, min_periods, center, closed): df.rolling(indexer, win_type="boxcar") -@pytest.mark.parametrize("func", ["cov", "corr"]) +@pytest.mark.parametrize("func", []) def test_notimplemented_functions(func): # GH 32865 class CustomIndexer(BaseIndexer): @@ -210,3 +210,39 @@ def test_rolling_forward_skewness(constructor): ] ) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "func,expected", + [ + ("cov", [2.0, 2.0, 2.0, 97.0, 2.0, -93.0, 2.0, 2.0, np.nan, np.nan],), + ( + "corr", + [ + 1.0, + 1.0, + 1.0, + 0.8704775290207161, + 0.018229084250926637, + -0.861357304646493, + 1.0, + 1.0, + np.nan, + np.nan, + ], + ), + ], +) +def test_rolling_forward_cov_corr(func, expected): + values1 = np.arange(10).reshape(-1,1) + values2 = values1 * 2 + values1[5,0] = 100 + values = np.concatenate([values1, values2], axis=1) + + indexer = FixedForwardWindowIndexer(window_size=3) + rolling = DataFrame(values).rolling(window=indexer, min_periods=3) + # We are interested in checking only pairwise covariance / correlation + result = getattr(rolling, func)().loc[(slice(None), 1), 0] + result = result.reset_index(drop=True) + expected = Series(expected) + tm.assert_equal(result, expected) \ No newline at end of file From 3a08839e2b46a76b4f183bee934c45474d509368 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:09:23 +0300 Subject: [PATCH 02/13] CLN: run black --- pandas/tests/window/test_base_indexer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 9542c25b4285d..c69bec82b617f 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -234,9 +234,9 @@ def test_rolling_forward_skewness(constructor): ], ) def test_rolling_forward_cov_corr(func, expected): - values1 = np.arange(10).reshape(-1,1) + values1 = np.arange(10).reshape(-1, 1) values2 = values1 * 2 - values1[5,0] = 100 + values1[5, 0] = 100 values = np.concatenate([values1, values2], axis=1) indexer = FixedForwardWindowIndexer(window_size=3) @@ -245,4 +245,4 @@ def test_rolling_forward_cov_corr(func, expected): result = getattr(rolling, func)().loc[(slice(None), 1), 0] result = result.reset_index(drop=True) expected = Series(expected) - tm.assert_equal(result, expected) \ No newline at end of file + tm.assert_equal(result, expected) From 8281c57e54060e75f8e6a435c7532c6c8a35ef37 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:14:57 +0300 Subject: [PATCH 03/13] TST: set expected Series name to result name --- pandas/tests/window/test_base_indexer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index c69bec82b617f..142818d8789c5 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -245,4 +245,5 @@ def test_rolling_forward_cov_corr(func, expected): result = getattr(rolling, func)().loc[(slice(None), 1), 0] result = result.reset_index(drop=True) expected = Series(expected) + expected.name = result.name tm.assert_equal(result, expected) From 5283c71f4acc326611a89fb6f6fe433c62a80499 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:15:43 +0300 Subject: [PATCH 04/13] BUG: fix rolling calls in _get_cov --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 24130c044d186..ab62888980123 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1653,11 +1653,11 @@ def _get_cov(X, Y): X = X.astype("float64") Y = Y.astype("float64") mean = lambda x: x.rolling( - window, self.min_periods, center=self.center + self.window, self.min_periods, center=self.center ).mean(**kwargs) count = ( (X + Y) - .rolling(window=window, min_periods=0, center=self.center) + .rolling(window=self.window, min_periods=0, center=self.center) .count(**kwargs) ) bias_adj = count / (count - ddof) From 35f029502243baa4013c0b3a02895adfd5ba4e98 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:16:24 +0300 Subject: [PATCH 05/13] BUG: fix rolling calls in _get_corr --- pandas/core/window/rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ab62888980123..9bf15bcb14062 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1790,10 +1790,10 @@ def corr(self, other=None, pairwise=None, **kwargs): def _get_corr(a, b): a = a.rolling( - window=window, min_periods=self.min_periods, center=self.center + window=self.window, min_periods=self.min_periods, center=self.center ) b = b.rolling( - window=window, min_periods=self.min_periods, center=self.center + window=self.window, min_periods=self.min_periods, center=self.center ) return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) From d0b2ca4aed1b22b73c83b7a2e70fd1a1ac58b55d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:19:36 +0300 Subject: [PATCH 06/13] BUG: pass self.window only if BaseIndexer --- pandas/core/window/rolling.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9bf15bcb14062..9c8e24e34043b 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1641,11 +1641,14 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) - # GH 16058: offset window - if self.is_freq_type: - window = self.win_freq + if isinstance(self.window, BaseIndexer): + window = self.window else: - window = self._get_window(other) + # GH 16058: offset window + if self.is_freq_type: + window = self.win_freq + else: + window = self._get_window(other) def _get_cov(X, Y): # GH #12373 : rolling functions error on float32 data @@ -1653,11 +1656,11 @@ def _get_cov(X, Y): X = X.astype("float64") Y = Y.astype("float64") mean = lambda x: x.rolling( - self.window, self.min_periods, center=self.center + window, self.min_periods, center=self.center ).mean(**kwargs) count = ( (X + Y) - .rolling(window=self.window, min_periods=0, center=self.center) + .rolling(window=window, min_periods=0, center=self.center) .count(**kwargs) ) bias_adj = count / (count - ddof) @@ -1786,14 +1789,18 @@ def corr(self, other=None, pairwise=None, **kwargs): # only default unset pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) - window = self._get_window(other) if not self.is_freq_type else self.win_freq + + if isinstance(self.window, BaseIndexer): + window = self.window + else: + window = self._get_window(other) if not self.is_freq_type else self.win_freq def _get_corr(a, b): a = a.rolling( - window=self.window, min_periods=self.min_periods, center=self.center + window=window, min_periods=self.min_periods, center=self.center ) b = b.rolling( - window=self.window, min_periods=self.min_periods, center=self.center + window=window, min_periods=self.min_periods, center=self.center ) return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) From e24c64f81322ae4fa1e9abf5d0c902ad22daea49 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:21:17 +0300 Subject: [PATCH 07/13] DOC: add whatsnew, clean up NotImplemented whatsnew --- doc/source/whatsnew/v1.1.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 845f7773c263c..0e87b319add6b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -175,8 +175,7 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``cov``, ``corr`` will now raise a ``NotImplementedError`` (:issue:`32865`) -- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) +- Using a :func:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew``, ``cov``, ``corr`` will now return correct results for any monotonic :func:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) - Added a :func:`pandas.api.indexers.FixedForwardWindowIndexer` class to support forward-looking windows during ``rolling`` operations. - From 708cfe684778f38cfb679912f7459741001bd23d Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sun, 26 Apr 2020 11:35:05 +0300 Subject: [PATCH 08/13] Revert "ERR: Raise NotImplementedError with BaseIndexer and certain rolling operations (#33057)" This reverts commit 2bf54a85b2efd462bf8cf50ea9d33dd1952de4f3. --- pandas/core/window/common.py | 24 ------------------------ pandas/core/window/rolling.py | 12 ++---------- pandas/tests/window/test_base_indexer.py | 13 ------------- 3 files changed, 2 insertions(+), 47 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 4b211258a1859..12b73646e14bf 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -324,27 +324,3 @@ def func(arg, window, min_periods=None): return cfunc(arg, window, min_periods) return func - - -def validate_baseindexer_support(func_name: Optional[str]) -> None: - # GH 32865: These functions work correctly with a BaseIndexer subclass - BASEINDEXER_WHITELIST = { - "count", - "min", - "max", - "mean", - "sum", - "median", - "std", - "var", - "skew", - "kurt", - "quantile", - "cov", - "corr", - } - if isinstance(func_name, str) and func_name not in BASEINDEXER_WHITELIST: - raise NotImplementedError( - f"{func_name} is not supported with using a BaseIndexer " - f"subclasses. You can use .apply() with {func_name}." - ) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 9c8e24e34043b..3143f1bf2425a 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -48,7 +48,6 @@ calculate_center_offset, calculate_min_periods, get_weighted_roll_func, - validate_baseindexer_support, zsqrt, ) from pandas.core.window.indexers import ( @@ -393,12 +392,11 @@ def _get_cython_func_type(self, func: str) -> Callable: return self._get_roll_func(f"{func}_variable") return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) - def _get_window_indexer(self, window: int, func_name: Optional[str]) -> BaseIndexer: + def _get_window_indexer(self, window: int) -> BaseIndexer: """ Return an indexer class that will compute the window start and end bounds """ if isinstance(self.window, BaseIndexer): - validate_baseindexer_support(func_name) return self.window if self.is_freq_type: return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) @@ -444,7 +442,7 @@ def _apply( blocks, obj = self._create_blocks() block_list = list(blocks) - window_indexer = self._get_window_indexer(window, name) + window_indexer = self._get_window_indexer(window) results = [] exclude: List[Scalar] = [] @@ -1632,9 +1630,6 @@ def quantile(self, quantile, interpolation="linear", **kwargs): """ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): - if isinstance(self.window, BaseIndexer): - validate_baseindexer_support("cov") - if other is None: other = self._selected_obj # only default unset @@ -1781,9 +1776,6 @@ def _get_cov(X, Y): ) def corr(self, other=None, pairwise=None, **kwargs): - if isinstance(self.window, BaseIndexer): - validate_baseindexer_support("corr") - if other is None: other = self._selected_obj # only default unset diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index 142818d8789c5..df58028dee862 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -82,19 +82,6 @@ def get_window_bounds(self, num_values, min_periods, center, closed): df.rolling(indexer, win_type="boxcar") -@pytest.mark.parametrize("func", []) -def test_notimplemented_functions(func): - # GH 32865 - class CustomIndexer(BaseIndexer): - def get_window_bounds(self, num_values, min_periods, center, closed): - return np.array([0, 1]), np.array([1, 2]) - - df = DataFrame({"values": range(2)}) - indexer = CustomIndexer() - with pytest.raises(NotImplementedError, match=f"{func} is not supported"): - getattr(df.rolling(indexer), func)() - - @pytest.mark.parametrize("constructor", [Series, DataFrame]) @pytest.mark.parametrize( "func,np_func,expected,np_kwargs", From 55d6f7db3821954ed372a0d71c93a985cd682993 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 27 Apr 2020 09:17:02 +0300 Subject: [PATCH 09/13] CLN: remove example.feather --- doc/example.feather | Bin 1120 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 doc/example.feather diff --git a/doc/example.feather b/doc/example.feather deleted file mode 100644 index 38dbf087a1590606ae40aaab16fd2d592b771b0e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1120 zcmZuxyGlbr5S^R5Nz{a}NW>HtK_sM7>|!IaNGdxCSXd;{_yCQdR#tw3l~`I>OA5ci zQbZ6$tgQTs#B+9c!YinJCM9>)^273K{XQhooJB2<^&GtHH$(bNO86J_M!J zyGzt-fVTu_y`vB(l0%(5PA9RI8omTJPF`^*F6IUYC9RG~2~6L$#r{j48ZpBzkgR*? zeK=XyxVoIL>-SgCJxtfJc!iX{qHf5i{ID};{XtI7TL*ORLEuwa*BRfN-m;sO{Zgj$ ztzwsCRRGO(Kw^-p-}PKGd}}=UlwnHzzIVB^>*KSHVAFV==PAS|XXz*6tt_YQ$5f~C oX+L~3>;6heD7rV}T*>^s5KCq8YfagILHpBwHUBLPZTxTf15K=C=l}o! From 1fc9e922b7fbcd1b04d271bc09a87ee565c36caa Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 27 Apr 2020 10:01:10 +0300 Subject: [PATCH 10/13] DOC: add comments to cov and corr --- pandas/core/window/rolling.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 3143f1bf2425a..560518cb72058 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1636,6 +1636,10 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) + # GH 32865. We leverage rolling.mean() to calculate covariance, + # so we need to construct those rolling objects with the same + # data that was used when constructing self: window width, + # frequency data or a BaseIndexer subclass if isinstance(self.window, BaseIndexer): window = self.window else: @@ -1782,6 +1786,10 @@ def corr(self, other=None, pairwise=None, **kwargs): pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) + # GH 32865. We leverage rolling.mean() to calculate covariance, + # so we need to construct those rolling objects with the same + # data that was used when constructing self: window width, + # frequency data or a BaseIndexer subclass if isinstance(self.window, BaseIndexer): window = self.window else: From 0d0c028162e5e2156eb12da832c5d8a39bdb2727 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 27 Apr 2020 10:09:12 +0300 Subject: [PATCH 11/13] DOC: rephrase comments --- pandas/core/window/rolling.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 560518cb72058..6c775953e18db 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1636,10 +1636,9 @@ def cov(self, other=None, pairwise=None, ddof=1, **kwargs): pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) - # GH 32865. We leverage rolling.mean() to calculate covariance, - # so we need to construct those rolling objects with the same - # data that was used when constructing self: window width, - # frequency data or a BaseIndexer subclass + # GH 32865. We leverage rolling.mean, so we pass + # to the rolling constructors the data used when constructing self: + # window width, frequency data, or a BaseIndexer subclass if isinstance(self.window, BaseIndexer): window = self.window else: @@ -1786,10 +1785,9 @@ def corr(self, other=None, pairwise=None, **kwargs): pairwise = True if pairwise is None else pairwise other = self._shallow_copy(other) - # GH 32865. We leverage rolling.mean() to calculate covariance, - # so we need to construct those rolling objects with the same - # data that was used when constructing self: window width, - # frequency data or a BaseIndexer subclass + # GH 32865. We leverage rolling.cov and rolling.std here, so we pass + # to the rolling constructors the data used when constructing self: + # window width, frequency data, or a BaseIndexer subclass if isinstance(self.window, BaseIndexer): window = self.window else: From 4bf564a72fc66a9ed0aa8615b220e66cab6b44fc Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 27 Apr 2020 10:51:57 +0300 Subject: [PATCH 12/13] restart tests From 771c2bcc378f2c228c00ac1bc048a6bf834c2236 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Mon, 27 Apr 2020 12:40:15 +0300 Subject: [PATCH 13/13] restart tests2