From 978b6270b16abb4d86ab29d61f65258c4cac5fee Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 10 Mar 2021 19:49:37 -0800 Subject: [PATCH 1/4] BUG: better exception on invalid slicing on CategoricalINdex --- pandas/core/indexes/category.py | 7 ------- pandas/tests/indexing/test_categorical.py | 6 +++++- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index a38ef55614638..6af7b1275f10d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -531,13 +531,6 @@ def _convert_list_indexer(self, keyarr): return self.get_indexer_for(keyarr) - @doc(Index._maybe_cast_slice_bound) - def _maybe_cast_slice_bound(self, label, side: str, kind): - if kind == "loc": - return label - - return super()._maybe_cast_slice_bound(label, side, kind) - # -------------------------------------------------------------------- def _is_comparable_dtype(self, dtype): diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index f104587ebbded..11943d353e8c8 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -469,7 +469,11 @@ def test_ix_categorical_index_non_unique(self): def test_loc_slice(self): # GH9748 - with pytest.raises(KeyError, match="1"): + msg = ( + "cannot do slice indexing on CategoricalIndex with these " + r"indexers \[1\] of type int" + ) + with pytest.raises(TypeError, match=msg): self.df.loc[1:5] result = self.df.loc["b":"c"] From 1412cb869839e3925c8a0ca728a72b4a28f2e16b Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 10 Mar 2021 19:52:42 -0800 Subject: [PATCH 2/4] CLN: remove no-longer-used 'kind' keyword --- pandas/core/indexes/base.py | 6 ++---- pandas/core/indexes/datetimes.py | 8 +++----- pandas/core/indexes/interval.py | 4 ++-- pandas/core/indexes/numeric.py | 3 +-- pandas/core/indexes/period.py | 5 +---- pandas/core/indexes/timedeltas.py | 5 +---- pandas/tests/indexes/datetimes/test_indexing.py | 6 +++--- pandas/tests/indexes/period/test_partial_slicing.py | 2 +- 8 files changed, 14 insertions(+), 25 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9543b11ad4de1..ed86062b33617 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5685,7 +5685,7 @@ def _validate_indexer(self, form: str_t, key, kind: str_t): if key is not None and not is_integer(key): raise self._invalid_indexer(form, key) - def _maybe_cast_slice_bound(self, label, side: str_t, kind): + def _maybe_cast_slice_bound(self, label, side: str_t): """ This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing @@ -5695,7 +5695,6 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind): ---------- label : object side : {'left', 'right'} - kind : {'loc', 'getitem'} or None Returns ------- @@ -5705,7 +5704,6 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind): ----- Value of `side` parameter should be validated in caller. """ - assert kind in ["loc", "getitem", None] # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and @@ -5760,7 +5758,7 @@ def get_slice_bound(self, label, side: str_t, kind) -> int: # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. - label = self._maybe_cast_slice_bound(label, side, kind) + label = self._maybe_cast_slice_bound(label, side) # we need to look up the label try: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ed0856f3d30a3..5f5af7fe0551c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -725,7 +725,7 @@ def _maybe_cast_for_get_loc(self, key) -> Timestamp: key = key.tz_convert(self.tz) return key - def _maybe_cast_slice_bound(self, label, side: str, kind): + def _maybe_cast_slice_bound(self, label, side: str): """ If label is a string, cast it to datetime according to resolution. @@ -733,7 +733,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind): ---------- label : object side : {'left', 'right'} - kind : {'loc', 'getitem'} or None Returns ------- @@ -743,7 +742,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind): ----- Value of `side` parameter should be validated in caller. """ - assert kind in ["loc", "getitem", None] if isinstance(label, str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) @@ -824,12 +822,12 @@ def check_str_or_none(point): mask = np.array(True) deprecation_mask = np.array(True) if start is not None: - start_casted = self._maybe_cast_slice_bound(start, "left", kind) + start_casted = self._maybe_cast_slice_bound(start, "left") mask = start_casted <= self deprecation_mask = start_casted == self if end is not None: - end_casted = self._maybe_cast_slice_bound(end, "right", kind) + end_casted = self._maybe_cast_slice_bound(end, "right") mask = (self <= end_casted) & mask deprecation_mask = (end_casted == self) | deprecation_mask diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 58c5b23d12a35..4f96cdca1161e 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -802,8 +802,8 @@ def _should_fallback_to_positional(self) -> bool: # positional in this case return self.dtype.subtype.kind in ["m", "M"] - def _maybe_cast_slice_bound(self, label, side: str, kind): - return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) + def _maybe_cast_slice_bound(self, label, side: str): + return getattr(self, side)._maybe_cast_slice_bound(label, side) @Appender(Index._convert_list_indexer.__doc__) def _convert_list_indexer(self, keyarr): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index b6f476d864011..473dfbe40a319 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -114,8 +114,7 @@ def _validate_dtype(cls, dtype: Dtype) -> None: # Indexing Methods @doc(Index._maybe_cast_slice_bound) - def _maybe_cast_slice_bound(self, label, side: str, kind): - assert kind in ["loc", "getitem", None] + def _maybe_cast_slice_bound(self, label, side: str): # we will try to coerce to integers return self._maybe_cast_indexer(label) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b15912e4c477b..2aa9248f652f4 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -532,7 +532,7 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError as err: raise KeyError(orig_key) from err - def _maybe_cast_slice_bound(self, label, side: str, kind: str): + def _maybe_cast_slice_bound(self, label, side: str): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. @@ -541,7 +541,6 @@ def _maybe_cast_slice_bound(self, label, side: str, kind: str): ---------- label : object side : {'left', 'right'} - kind : {'loc', 'getitem'} Returns ------- @@ -550,9 +549,7 @@ def _maybe_cast_slice_bound(self, label, side: str, kind: str): Notes ----- Value of `side` parameter should be validated in caller. - """ - assert kind in ["loc", "getitem"] if isinstance(label, datetime): return Period(label, freq=self.freq) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index a23dd10bc3c0e..c7ca963216b8e 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -192,7 +192,7 @@ def get_loc(self, key, method=None, tolerance=None): return Index.get_loc(self, key, method, tolerance) - def _maybe_cast_slice_bound(self, label, side: str, kind): + def _maybe_cast_slice_bound(self, label, side: str): """ If label is a string, cast it to timedelta according to resolution. @@ -200,14 +200,11 @@ def _maybe_cast_slice_bound(self, label, side: str, kind): ---------- label : object side : {'left', 'right'} - kind : {'loc', 'getitem'} or None Returns ------- label : object """ - assert kind in ["loc", "getitem", None] - if isinstance(label, str): parsed = Timedelta(label) lbound = parsed.round(parsed.resolution_string) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index d29d4647f4753..0b45df4f79994 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -679,18 +679,18 @@ def test_maybe_cast_slice_bounds_empty(self): # GH#14354 empty_idx = date_range(freq="1H", periods=0, end="2015") - right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right", "loc") + right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right") exp = Timestamp("2015-01-02 23:59:59.999999999") assert right == exp - left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left", "loc") + left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left") exp = Timestamp("2015-01-02 00:00:00") assert left == exp def test_maybe_cast_slice_duplicate_monotonic(self): # https://github.com/pandas-dev/pandas/issues/16515 idx = DatetimeIndex(["2017", "2017"]) - result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc") + result = idx._maybe_cast_slice_bound("2017-01-01", "left") expected = Timestamp("2017-01-01") assert result == expected diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index b0e573250d02e..793878c0217e5 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -110,7 +110,7 @@ def test_maybe_cast_slice_bound(self, make_range, frame_or_series): # Check the lower-level calls are raising where expected. with pytest.raises(TypeError, match=msg): - idx._maybe_cast_slice_bound("foo", "left", "loc") + idx._maybe_cast_slice_bound("foo", "left") with pytest.raises(TypeError, match=msg): idx.get_slice_bound("foo", "left", "loc") From c5dfc5a8a927c443cf6586f22a8577a2d05eda19 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 15:10:42 -0800 Subject: [PATCH 3/4] revert bc dask breaks --- pandas/core/indexes/base.py | 6 ++++-- pandas/core/indexes/datetimes.py | 8 +++++--- pandas/core/indexes/interval.py | 4 ++-- pandas/core/indexes/numeric.py | 3 ++- pandas/core/indexes/period.py | 5 ++++- pandas/core/indexes/timedeltas.py | 5 ++++- pandas/tests/indexes/datetimes/test_indexing.py | 6 +++--- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dcedd9d66e6b9..8b67b98b32f7f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5680,7 +5680,7 @@ def _validate_indexer(self, form: str_t, key, kind: str_t): if key is not None and not is_integer(key): raise self._invalid_indexer(form, key) - def _maybe_cast_slice_bound(self, label, side: str_t): + def _maybe_cast_slice_bound(self, label, side: str_t, kind): """ This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing @@ -5690,6 +5690,7 @@ def _maybe_cast_slice_bound(self, label, side: str_t): ---------- label : object side : {'left', 'right'} + kind : {'loc', 'getitem'} or None Returns ------- @@ -5699,6 +5700,7 @@ def _maybe_cast_slice_bound(self, label, side: str_t): ----- Value of `side` parameter should be validated in caller. """ + assert kind in ["loc", "getitem", None] # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and @@ -5753,7 +5755,7 @@ def get_slice_bound(self, label, side: str_t, kind) -> int: # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. - label = self._maybe_cast_slice_bound(label, side) + label = self._maybe_cast_slice_bound(label, side, kind) # we need to look up the label try: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5f5af7fe0551c..ed0856f3d30a3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -725,7 +725,7 @@ def _maybe_cast_for_get_loc(self, key) -> Timestamp: key = key.tz_convert(self.tz) return key - def _maybe_cast_slice_bound(self, label, side: str): + def _maybe_cast_slice_bound(self, label, side: str, kind): """ If label is a string, cast it to datetime according to resolution. @@ -733,6 +733,7 @@ def _maybe_cast_slice_bound(self, label, side: str): ---------- label : object side : {'left', 'right'} + kind : {'loc', 'getitem'} or None Returns ------- @@ -742,6 +743,7 @@ def _maybe_cast_slice_bound(self, label, side: str): ----- Value of `side` parameter should be validated in caller. """ + assert kind in ["loc", "getitem", None] if isinstance(label, str): freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) @@ -822,12 +824,12 @@ def check_str_or_none(point): mask = np.array(True) deprecation_mask = np.array(True) if start is not None: - start_casted = self._maybe_cast_slice_bound(start, "left") + start_casted = self._maybe_cast_slice_bound(start, "left", kind) mask = start_casted <= self deprecation_mask = start_casted == self if end is not None: - end_casted = self._maybe_cast_slice_bound(end, "right") + end_casted = self._maybe_cast_slice_bound(end, "right", kind) mask = (self <= end_casted) & mask deprecation_mask = (end_casted == self) | deprecation_mask diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 419b22dd45825..86ff95a588217 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -802,8 +802,8 @@ def _should_fallback_to_positional(self) -> bool: # positional in this case return self.dtype.subtype.kind in ["m", "M"] - def _maybe_cast_slice_bound(self, label, side: str): - return getattr(self, side)._maybe_cast_slice_bound(label, side) + def _maybe_cast_slice_bound(self, label, side: str, kind): + return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) @Appender(Index._convert_list_indexer.__doc__) def _convert_list_indexer(self, keyarr): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 473dfbe40a319..b6f476d864011 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -114,7 +114,8 @@ def _validate_dtype(cls, dtype: Dtype) -> None: # Indexing Methods @doc(Index._maybe_cast_slice_bound) - def _maybe_cast_slice_bound(self, label, side: str): + def _maybe_cast_slice_bound(self, label, side: str, kind): + assert kind in ["loc", "getitem", None] # we will try to coerce to integers return self._maybe_cast_indexer(label) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 53fed1f39c5d2..0c5dbec2094e5 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -530,7 +530,7 @@ def get_loc(self, key, method=None, tolerance=None): except KeyError as err: raise KeyError(orig_key) from err - def _maybe_cast_slice_bound(self, label, side: str): + def _maybe_cast_slice_bound(self, label, side: str, kind: str): """ If label is a string or a datetime, cast it to Period.ordinal according to resolution. @@ -539,6 +539,7 @@ def _maybe_cast_slice_bound(self, label, side: str): ---------- label : object side : {'left', 'right'} + kind : {'loc', 'getitem'} Returns ------- @@ -547,7 +548,9 @@ def _maybe_cast_slice_bound(self, label, side: str): Notes ----- Value of `side` parameter should be validated in caller. + """ + assert kind in ["loc", "getitem"] if isinstance(label, datetime): return Period(label, freq=self.freq) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c7ca963216b8e..a23dd10bc3c0e 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -192,7 +192,7 @@ def get_loc(self, key, method=None, tolerance=None): return Index.get_loc(self, key, method, tolerance) - def _maybe_cast_slice_bound(self, label, side: str): + def _maybe_cast_slice_bound(self, label, side: str, kind): """ If label is a string, cast it to timedelta according to resolution. @@ -200,11 +200,14 @@ def _maybe_cast_slice_bound(self, label, side: str): ---------- label : object side : {'left', 'right'} + kind : {'loc', 'getitem'} or None Returns ------- label : object """ + assert kind in ["loc", "getitem", None] + if isinstance(label, str): parsed = Timedelta(label) lbound = parsed.round(parsed.resolution_string) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 0b45df4f79994..d29d4647f4753 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -679,18 +679,18 @@ def test_maybe_cast_slice_bounds_empty(self): # GH#14354 empty_idx = date_range(freq="1H", periods=0, end="2015") - right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right") + right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right", "loc") exp = Timestamp("2015-01-02 23:59:59.999999999") assert right == exp - left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left") + left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left", "loc") exp = Timestamp("2015-01-02 00:00:00") assert left == exp def test_maybe_cast_slice_duplicate_monotonic(self): # https://github.com/pandas-dev/pandas/issues/16515 idx = DatetimeIndex(["2017", "2017"]) - result = idx._maybe_cast_slice_bound("2017-01-01", "left") + result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc") expected = Timestamp("2017-01-01") assert result == expected From c227de6963fce8e0298729fabfe362a84498195d Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 16:30:37 -0800 Subject: [PATCH 4/4] revert test change --- pandas/tests/indexes/period/test_partial_slicing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 793878c0217e5..b0e573250d02e 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -110,7 +110,7 @@ def test_maybe_cast_slice_bound(self, make_range, frame_or_series): # Check the lower-level calls are raising where expected. with pytest.raises(TypeError, match=msg): - idx._maybe_cast_slice_bound("foo", "left") + idx._maybe_cast_slice_bound("foo", "left", "loc") with pytest.raises(TypeError, match=msg): idx.get_slice_bound("foo", "left", "loc")