diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9a6455d4d012f..90a2ccf9ab202 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -94,6 +94,50 @@ Other API changes Deprecations ~~~~~~~~~~~~ + +.. _whatsnew_150.deprecations.int_slicing_series: + +In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`). + +For example: + +.. ipython:: python + + ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11]) + +In the old behavior, ``ser[2:4]`` treats the slice as positional: + +*Old behavior*: + +.. code-block:: ipython + + In [3]: ser[2:4] + Out[3]: + 5 3 + 7 4 + dtype: int64 + +In a future version, this will be treated as label-based: + +*Future behavior*: + +.. code-block:: ipython + + In [4]: ser.loc[2:4] + Out[4]: + 2 1 + 3 2 + dtype: int64 + +To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior, +use ``series.loc[i:j]``. + +Slicing on a :class:`DataFrame` will not be affected. + +.. _whatsnew_150.deprecations.other: + +Other Deprecations +^^^^^^^^^^^^^^^^^^ - Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`) - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 95d9ed7adc360..517c997779abd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -110,6 +110,7 @@ ABCDatetimeIndex, ABCMultiIndex, ABCPeriodIndex, + ABCRangeIndex, ABCSeries, ABCTimedeltaIndex, ) @@ -3989,7 +3990,7 @@ def _validate_positional_slice(self, key: slice) -> None: self._validate_indexer("positional", key.stop, "iloc") self._validate_indexer("positional", key.step, "iloc") - def _convert_slice_indexer(self, key: slice, kind: str_t): + def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False): """ Convert a slice indexer. @@ -4000,6 +4001,9 @@ def _convert_slice_indexer(self, key: slice, kind: str_t): ---------- key : label of the slice bound kind : {'loc', 'getitem'} + is_frame : bool, default False + Whether this is a slice called on DataFrame.__getitem__ + as opposed to Series.__getitem__ """ assert kind in ["loc", "getitem"], kind @@ -4020,7 +4024,44 @@ def is_int(v): called from the getitem slicers, validate that we are in fact integers """ + if self.is_integer(): + if is_frame: + # unambiguously positional, no deprecation + pass + elif start is None and stop is None: + # label-based vs positional is irrelevant + pass + elif isinstance(self, ABCRangeIndex) and self._range == range( + len(self) + ): + # In this case there is no difference between label-based + # and positional, so nothing will change. + pass + elif ( + self.dtype.kind in ["i", "u"] + and self._is_strictly_monotonic_increasing + and len(self) > 0 + and self[0] == 0 + and self[-1] == len(self) - 1 + ): + # We are range-like, e.g. created with Index(np.arange(N)) + pass + elif not is_index_slice: + # we're going to raise, so don't bother warning, e.g. + # test_integer_positional_indexing + pass + else: + warnings.warn( + "The behavior of `series[i:j]` with an integer-dtype index " + "is deprecated. In a future version, this will be treated " + "as *label-based* indexing, consistent with e.g. `series[i]` " + "lookups. To retain the old behavior, use `series.iloc[i:j]`. " + "To get the future behavior, use `series.loc[i:j]`.", + FutureWarning, + stacklevel=find_stack_level(), + ) if self.is_integer() or is_index_slice: + # Note: these checks are redundant if we know is_index_slice self._validate_indexer("slice", key.start, "getitem") self._validate_indexer("slice", key.stop, "getitem") self._validate_indexer("slice", key.step, "getitem") diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index a378fd95b9c03..5250f19c839bf 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -754,7 +754,7 @@ def _index_as_unique(self) -> bool: "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique" ) - def _convert_slice_indexer(self, key: slice, kind: str): + def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False): if not (key.step is None or key.step == 1): # GH#31658 if label-based, we require step == 1, # if positional, we disallow float start/stop @@ -766,7 +766,7 @@ def _convert_slice_indexer(self, key: slice, kind: str): # i.e. this cannot be interpreted as a positional slice raise ValueError(msg) - return super()._convert_slice_indexer(key, kind) + return super()._convert_slice_indexer(key, kind, is_frame=is_frame) @cache_readonly def _should_fallback_to_positional(self) -> bool: diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index fa32953c38cb0..3f736d9a62aa1 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -268,7 +268,7 @@ def _should_fallback_to_positional(self) -> bool: return False @doc(Index._convert_slice_indexer) - def _convert_slice_indexer(self, key: slice, kind: str): + def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False): if is_float_dtype(self.dtype): assert kind in ["loc", "getitem"] @@ -276,7 +276,7 @@ def _convert_slice_indexer(self, key: slice, kind: str): # translate to locations return self.slice_indexer(key.start, key.stop, key.step) - return super()._convert_slice_indexer(key, kind=kind) + return super()._convert_slice_indexer(key, kind=kind, is_frame=is_frame) @doc(Index._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 77482cbc88bf5..0bf6f22c2d385 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2313,7 +2313,7 @@ def convert_to_index_sliceable(obj: DataFrame, key): """ idx = obj.index if isinstance(key, slice): - return idx._convert_slice_indexer(key, kind="getitem") + return idx._convert_slice_indexer(key, kind="getitem", is_frame=True) elif isinstance(key, str): diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 71e1e61b50256..e966d4602a02c 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -313,7 +313,8 @@ def test_get(self, data): expected = s.iloc[[2, 3]] self.assert_series_equal(result, expected) - result = s.get(slice(2)) + with tm.assert_produces_warning(FutureWarning, match="label-based"): + result = s.get(slice(2)) expected = s.iloc[[0, 1]] self.assert_series_equal(result, expected) @@ -336,7 +337,9 @@ def test_get(self, data): # GH 21257 s = pd.Series(data) - s2 = s[::2] + with tm.assert_produces_warning(None): + # GH#45324 make sure we aren't giving a spurious FutureWarning + s2 = s[::2] assert s2.get(1) is None def test_take_sequence(self, data): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 8f9caf1cd13aa..6a4620305523e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1009,7 +1009,7 @@ def test_iloc_row_slice_view(self, using_array_manager): exp_col = original[2].copy() # TODO(ArrayManager) verify it is expected that the original didn't change if not using_array_manager: - exp_col[4:8] = 0.0 + exp_col._values[4:8] = 0.0 tm.assert_series_equal(df[2], exp_col) def test_iloc_col(self): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 22a4ce327c150..72a86b8ad946d 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -367,11 +367,11 @@ def test_apply_frame_not_as_index_column_name(df): def test_apply_frame_concat_series(): def trans(group): - return group.groupby("B")["C"].sum().sort_values()[:2] + return group.groupby("B")["C"].sum().sort_values().iloc[:2] def trans2(group): grouped = group.groupby(df.reindex(group.index)["B"]) - return grouped.sum().sort_values()[:2] + return grouped.sum().sort_values().iloc[:2] df = DataFrame( { @@ -409,7 +409,7 @@ def test_apply_chunk_view(): # Low level tinkering could be unsafe, make sure not df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) - result = df.groupby("key", group_keys=False).apply(lambda x: x[:2]) + result = df.groupby("key", group_keys=False).apply(lambda x: x.iloc[:2]) expected = df.take([0, 1, 3, 4, 6, 7]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index af308379cba5e..ebf5f9a1dc696 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -657,7 +657,7 @@ def test_uint_index_does_not_convert_to_float64(box): ) tm.assert_index_equal(result.index, expected) - tm.assert_equal(result, series[:3]) + tm.assert_equal(result, series.iloc[:3]) def test_float64_index_equals(): diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 902bd943584d9..cefb3aee861b8 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -343,7 +343,8 @@ def test_integer_positional_indexing(self, idx): """ s = Series(range(2, 6), index=range(2, 6)) - result = s[2:4] + with tm.assert_produces_warning(FutureWarning, match="label-based"): + result = s[2:4] expected = s.iloc[2:4] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index a3b876089994b..e028f6f293c62 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1386,8 +1386,10 @@ def test_iloc(self): tm.assert_series_equal(result, expected) # test slice is a view - result[:] = 0 - assert (ser[1:3] == 0).all() + with tm.assert_produces_warning(None): + # GH#45324 make sure we aren't giving a spurious FutureWarning + result[:] = 0 + assert (ser.iloc[1:3] == 0).all() # list of integers result = ser.iloc[[0, 2, 3, 4, 5]] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 5dc065ca50063..c91a5c2764b34 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -68,7 +68,8 @@ def test_setitem_ndarray_1d_2(self): msg = "Must have equal len keys and value when setting with an iterable" with pytest.raises(ValueError, match=msg): - df[2:5] = np.arange(1, 4) * 1j + with tm.assert_produces_warning(FutureWarning, match="label-based"): + df[2:5] = np.arange(1, 4) * 1j def test_getitem_ndarray_3d( self, index, frame_or_series, indexer_sli, using_array_manager diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py index e8034bd4f7160..1a54796dbeec3 100644 --- a/pandas/tests/series/indexing/test_get.py +++ b/pandas/tests/series/indexing/test_get.py @@ -167,7 +167,8 @@ def test_get_with_ea(arr): expected = ser.iloc[[2, 3]] tm.assert_series_equal(result, expected) - result = ser.get(slice(2)) + with tm.assert_produces_warning(FutureWarning, match="label-based"): + result = ser.get(slice(2)) expected = ser.iloc[[0, 1]] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 0da376ccac450..111c1652240e2 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -332,7 +332,8 @@ def test_getitem_slice_bug(self): def test_getitem_slice_integers(self): ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) - result = ser[:4] + with tm.assert_produces_warning(FutureWarning, match="label-based"): + result = ser[:4] expected = Series(ser.values[:4], index=[2, 4, 6, 8]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index c4e5164206126..e1af2508af773 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -215,9 +215,15 @@ def test_setitem_slice(self): def test_setitem_slice_integers(self): ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) - ser[:4] = 0 - assert (ser[:4] == 0).all() - assert not (ser[4:] == 0).any() + msg = r"In a future version, this will be treated as \*label-based\* indexing" + with tm.assert_produces_warning(FutureWarning, match=msg): + ser[:4] = 0 + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + assert (ser[:4] == 0).all() + with tm.assert_produces_warning(FutureWarning, match=msg): + assert not (ser[4:] == 0).any() def test_setitem_slicestep(self): # caught this bug when writing tests diff --git a/pandas/tests/series/methods/test_item.py b/pandas/tests/series/methods/test_item.py index 2bdeb4da5f70f..8e8c33619d564 100644 --- a/pandas/tests/series/methods/test_item.py +++ b/pandas/tests/series/methods/test_item.py @@ -55,5 +55,5 @@ def test_item(self): # Case where ser[0] would not work ser = Series(dti, index=[5, 6]) - val = ser[:1].item() + val = ser.iloc[:1].item() assert val == dti[0] diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index 886ed676dabb2..8e690a677aa98 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -214,7 +214,8 @@ def test_center(raw): expected = ( concat([obj, Series([np.NaN] * 9)]) .rolling(20, min_periods=15) - .apply(f, raw=raw)[9:] + .apply(f, raw=raw) + .iloc[9:] .reset_index(drop=True) ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 23c3a0ef27fef..b1e8b43258750 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -552,11 +552,11 @@ def test_ew_min_periods(min_periods, name): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_ewm_corr_cov(name): - A = Series(np.random.randn(50), index=np.arange(50)) + A = Series(np.random.randn(50), index=range(50)) B = A[2:] + np.random.randn(48) A[:10] = np.NaN - B[-10:] = np.NaN + B.iloc[-10:] = np.NaN result = getattr(A.ewm(com=20, min_periods=5), name)(B) assert np.isnan(result.values[:14]).all() @@ -567,11 +567,11 @@ def test_ewm_corr_cov(name): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_ewm_corr_cov_min_periods(name, min_periods): # GH 7898 - A = Series(np.random.randn(50), index=np.arange(50)) + A = Series(np.random.randn(50), index=range(50)) B = A[2:] + np.random.randn(48) A[:10] = np.NaN - B[-10:] = np.NaN + B.iloc[-10:] = np.NaN result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B) # binary functions (ewmcov, ewmcorr) with bias=False require at @@ -593,7 +593,7 @@ def test_ewm_corr_cov_min_periods(name, min_periods): @pytest.mark.parametrize("name", ["cov", "corr"]) def test_different_input_array_raise_exception(name): - A = Series(np.random.randn(50), index=np.arange(50)) + A = Series(np.random.randn(50), index=range(50)) A[:10] = np.NaN msg = "other must be a DataFrame or Series" diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 814bd6b998182..31dbcdfce44e7 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1391,7 +1391,7 @@ def test_rolling_corr_timedelta_index(index, window): # GH: 31286 x = Series([1, 2, 3, 4, 5], index=index) y = x.copy() - x[0:2] = 0.0 + x.iloc[0:2] = 0.0 result = x.rolling(window).corr(y) expected = Series([np.nan, np.nan, 1, 1, 1], index=index) tm.assert_almost_equal(result, expected) diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py index c788b3d88cb63..842c056806092 100644 --- a/pandas/tests/window/test_rolling_functions.py +++ b/pandas/tests/window/test_rolling_functions.py @@ -247,9 +247,13 @@ def test_center(roll_func, kwargs, minp): result = getattr(obj.rolling(20, min_periods=minp, center=True), roll_func)( **kwargs ) - expected = getattr( - concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func - )(**kwargs)[9:].reset_index(drop=True) + expected = ( + getattr( + concat([obj, Series([np.NaN] * 9)]).rolling(20, min_periods=minp), roll_func + )(**kwargs) + .iloc[9:] + .reset_index(drop=True) + ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_rolling_quantile.py b/pandas/tests/window/test_rolling_quantile.py index 56681c2aaa57e..56b79097a1d05 100644 --- a/pandas/tests/window/test_rolling_quantile.py +++ b/pandas/tests/window/test_rolling_quantile.py @@ -133,7 +133,8 @@ def test_center(q): expected = ( concat([obj, Series([np.NaN] * 9)]) .rolling(20) - .quantile(q)[9:] + .quantile(q) + .iloc[9:] .reset_index(drop=True) ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py index 2c275ed6f4a28..46b7eb6cbc285 100644 --- a/pandas/tests/window/test_rolling_skew_kurt.py +++ b/pandas/tests/window/test_rolling_skew_kurt.py @@ -129,9 +129,11 @@ def test_center(roll_func): obj[-10:] = np.NaN result = getattr(obj.rolling(20, center=True), roll_func)() - expected = getattr(concat([obj, Series([np.NaN] * 9)]).rolling(20), roll_func)()[ - 9: - ].reset_index(drop=True) + expected = ( + getattr(concat([obj, Series([np.NaN] * 9)]).rolling(20), roll_func)() + .iloc[9:] + .reset_index(drop=True) + ) tm.assert_series_equal(result, expected)