From a2304b8dd2fa51ce7ccad1e6386a269b077b7d6b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 17 Jul 2022 21:43:52 +0200 Subject: [PATCH 1/3] REGR: fix pd.cut with datetime IntervalIndex as bins --- pandas/core/indexes/base.py | 5 ++++- pandas/tests/indexes/interval/test_indexing.py | 14 ++++++++++++++ pandas/tests/reshape/test_cut.py | 10 ++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a212da050e1f1..e1141b7109c63 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3981,8 +3981,11 @@ def _should_partial_index(self, target: Index) -> bool: Should we attempt partial-matching indexing? """ if is_interval_dtype(self.dtype): + if is_interval_dtype(target.dtype): + return False # "Index" has no attribute "left" - return self.left._should_compare(target) # type: ignore[attr-defined] + # return self.left._should_compare(target) # type: ignore[attr-defined] + return True return False @final diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py index 4cf754a7e52e0..276903b88615c 100644 --- a/pandas/tests/indexes/interval/test_indexing.py +++ b/pandas/tests/indexes/interval/test_indexing.py @@ -8,6 +8,7 @@ from pandas import ( NA, CategoricalIndex, + DatetimeIndex, Index, Interval, IntervalIndex, @@ -312,6 +313,19 @@ def test_get_indexer_categorical_with_nans(self): expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) + def test_get_indexer_datetime(self): + ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4)) + result = ii.get_indexer(DatetimeIndex(["2018-01-02"])) + expected = np.array([0], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + # TODO those should probably be deprecated? + result = ii.get_indexer(DatetimeIndex(["2018-01-02"]).astype(str)) + tm.assert_numpy_array_equal(result, expected) + + result = ii.get_indexer(DatetimeIndex(["2018-01-02"]).asi8) + tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize( "tuples, inclusive", [ diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 815890f319396..5c9407cd5ffc6 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -14,6 +14,7 @@ Timestamp, cut, date_range, + interval_range, isna, qcut, timedelta_range, @@ -739,3 +740,12 @@ def test_cut_with_timestamp_tuple_labels(): expected = Categorical.from_codes([0, 1, 2], labels, ordered=True) tm.assert_categorical_equal(result, expected) + + +def test_cut_bins_datetime_intervalindex(): + # https://github.com/pandas-dev/pandas/issues/46218 + bins = interval_range(Timestamp("2022-02-25"), Timestamp("2022-02-27"), freq="1D") + # passing Series instead of list is important to trigger bug + result = cut(Series([Timestamp("2022-02-26")]), bins=bins) + expected = Categorical.from_codes([0], bins, ordered=True) + tm.assert_categorical_equal(result.array, expected) From 3436a0b9d5c8f6e8153c0e75cc1fac4c9728fa85 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 17 Jul 2022 22:10:59 +0200 Subject: [PATCH 2/3] add whatsnew + update test comment --- doc/source/whatsnew/v1.4.4.rst | 1 + pandas/tests/indexes/interval/test_indexing.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 6ee140f59e096..fd1507fbe8ab9 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -15,6 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`) +- Fixed regression in :func:`cut` using a ``datetime64`` IntervalIndex as bins (:issue:`46218`) - .. --------------------------------------------------------------------------- diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py index 276903b88615c..67ccb0cd58d6f 100644 --- a/pandas/tests/indexes/interval/test_indexing.py +++ b/pandas/tests/indexes/interval/test_indexing.py @@ -319,10 +319,11 @@ def test_get_indexer_datetime(self): expected = np.array([0], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) - # TODO those should probably be deprecated? result = ii.get_indexer(DatetimeIndex(["2018-01-02"]).astype(str)) tm.assert_numpy_array_equal(result, expected) + # TODO this should probably be deprecated? + # https://github.com/pandas-dev/pandas/issues/47772 result = ii.get_indexer(DatetimeIndex(["2018-01-02"]).asi8) tm.assert_numpy_array_equal(result, expected) From aa4226861afccecc18c6a2f9378103d6a8307f14 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 12 Aug 2022 13:45:19 +0200 Subject: [PATCH 3/3] add comment --- pandas/core/indexes/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 29e33e6c439c7..b0b8e4c248e9d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3989,6 +3989,9 @@ def _should_partial_index(self, target: Index) -> bool: if is_interval_dtype(self.dtype): if is_interval_dtype(target.dtype): return False + # See https://github.com/pandas-dev/pandas/issues/47772 the commented + # out code can be restored (instead of hardcoding `return True`) + # once that issue if fixed # "Index" has no attribute "left" # return self.left._should_compare(target) # type: ignore[attr-defined] return True