From f75fd19195a934508275a00c7fe44881f458c5f3 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 27 Feb 2022 21:01:22 +0800 Subject: [PATCH 01/63] Update frame.py --- pandas/core/frame.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf1988808bbb0..085986792f8f4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5625,6 +5625,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5637,6 +5638,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5649,6 +5651,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5661,6 +5664,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5672,6 +5676,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5684,6 +5689,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5696,6 +5702,7 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", allow_duplicates: bool | lib.NoDefault = lib.no_default, + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5723,6 +5730,10 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. + names : str, tuple or list, default None + Using the given string, rename the DataFrame column which contains the + index data. If the DataFrame has a MultiIndex, this has to be a list or + tuple with length equal to the number of levels. .. versionadded:: 1.5.0 @@ -5841,6 +5852,15 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump + + Using the `names` parameter, it is possible to choose a name for the old index column: + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) @@ -5861,12 +5881,11 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] + + names = self.index.get_default_index_names(names) if isinstance(self.index, MultiIndex): - names = com.fill_missing_names(self.index.names) to_insert = zip(self.index.levels, self.index.codes) else: - default = "index" if "index" not in self else "level_0" - names = [default] if self.index.name is None else [self.index.name] to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) From bf9b3de319bd6123f80ea64f7e6fed7363491dc0 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 27 Feb 2022 21:01:46 +0800 Subject: [PATCH 02/63] Update base.py --- pandas/core/indexes/base.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 108646f8766a6..f368eba5edbf6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1765,6 +1765,30 @@ def _validate_names( return new_names + def get_default_index_names(self, names=None) -> Sequence[str]: + from pandas.core.indexes.multi import MultiIndex + + # if names is not None and not all(isinstance(name, str) for name in names): + # raise ValueError("Names must be a string") + if names is not None: + if isinstance(names, str): + names = [names] + elif isinstance(names, list) and not all( + isinstance(name, str) for name in names + ): + raise ValueError("Names must be a string") + elif not (isinstance(names, list) and not isinstance(names, str)): + raise ValueError("Names must be a string or list") + + if not names: + if isinstance(self, MultiIndex): + names = com.fill_missing_names(self.names) + else: + default = "index" if "index" not in self else "level_0" + names = [default] if self.name is None else [self.name] + + return names + def _get_names(self) -> FrozenList: return FrozenList((self.name,)) From e4665ab53a63d33448b882733c0b6da2d04584bd Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 27 Feb 2022 21:02:19 +0800 Subject: [PATCH 03/63] Update test_reset_index.py --- .../tests/frame/methods/test_reset_index.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 840d0c6e6bdf4..305a715443e8b 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -754,3 +754,33 @@ def test_reset_index_interval_columns_object_cast(): columns=Index(["Year", Interval(0, 1), Interval(1, 2)]), ) tm.assert_frame_equal(result, expected) + + +def test_reset_index_rename(float_frame): + # GH 6878 + rdf = float_frame.reset_index(names="new_name") + exp = Series(float_frame.index.values, name="new_name") + tm.assert_series_equal(rdf["new_name"], exp) + + with pytest.raises(ValueError, match="Names must be a string"): + float_frame.reset_index(names=1) + + +def test_reset_index_rename_multiindex(float_frame): + # GH 6878 + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + deleveled2 = stacked.reset_index(names=["new_first", "new_second"]) + tm.assert_series_equal( + deleveled["first"], deleveled2["new_first"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["new_second"], check_names=False + ) + + with pytest.raises(ValueError, match="Names must be a string or list"): + stacked.reset_index(names={"first": "new_first", "second": "new_second"}) From bdb010f75119db76c666ee425c58aee46ab9ad81 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 27 Feb 2022 21:05:54 +0800 Subject: [PATCH 04/63] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7e470a51858ce..c7b05b0345c60 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -39,8 +39,7 @@ Other enhancements - :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`) - Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`) - Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`) - -- +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From 7871c05c8dbfdb89b18447dd6c9b6b12d9bae929 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 27 Feb 2022 21:10:07 +0800 Subject: [PATCH 05/63] add --- .../tests/frame/methods/test_reset_index.py | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 305a715443e8b..0817970204c47 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -758,9 +758,9 @@ def test_reset_index_interval_columns_object_cast(): def test_reset_index_rename(float_frame): # GH 6878 - rdf = float_frame.reset_index(names="new_name") - exp = Series(float_frame.index.values, name="new_name") - tm.assert_series_equal(rdf["new_name"], exp) + result = float_frame.reset_index(names="new_name") + expected = Series(float_frame.index.values, name="new_name") + tm.assert_series_equal(result["new_name"], expected) with pytest.raises(ValueError, match="Names must be a string"): float_frame.reset_index(names=1) @@ -768,19 +768,16 @@ def test_reset_index_rename(float_frame): def test_reset_index_rename_multiindex(float_frame): # GH 6878 - stacked = float_frame.stack()[::2] - stacked = DataFrame({"foo": stacked, "bar": stacked}) + stacked_df = float_frame.stack()[::2] + stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df}) names = ["first", "second"] - stacked.index.names = names - deleveled = stacked.reset_index() - deleveled2 = stacked.reset_index(names=["new_first", "new_second"]) - tm.assert_series_equal( - deleveled["first"], deleveled2["new_first"], check_names=False - ) - tm.assert_series_equal( - deleveled["second"], deleveled2["new_second"], check_names=False - ) + stacked_df.index.names = names + + result = stacked_df.reset_index() + expected = stacked_df.reset_index(names=["new_first", "new_second"]) + tm.assert_series_equal(result["first"], expected["new_first"], check_names=False) + tm.assert_series_equal(result["second"], expected["new_second"], check_names=False) with pytest.raises(ValueError, match="Names must be a string or list"): - stacked.reset_index(names={"first": "new_first", "second": "new_second"}) + stacked_df.reset_index(names={"first": "new_first", "second": "new_second"}) From 0166c9d1eb1e3bae67b38421811591ae263bb62d Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 27 Feb 2022 21:23:54 +0800 Subject: [PATCH 06/63] Update frame.py --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 085986792f8f4..ebc7bcea86902 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5853,7 +5853,8 @@ class max type lion mammal 80.5 run monkey mammal NaN jump - Using the `names` parameter, it is possible to choose a name for the old index column: + Using the `names` parameter, it is possible to choose a name for the old + index column: >>> df.reset_index(names='name') name class max_speed 0 falcon bird 389.0 From cb460d755cbe9a1e7204be9e47f483d4dd85a966 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 27 Feb 2022 22:27:58 +0800 Subject: [PATCH 07/63] Update frame.py --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ebc7bcea86902..81273b762846c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5855,6 +5855,7 @@ class max type Using the `names` parameter, it is possible to choose a name for the old index column: + >>> df.reset_index(names='name') name class max_speed 0 falcon bird 389.0 From 5fe7c1b3cc3d479260bad5f97b3430a3e0b4b5e9 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 27 Feb 2022 22:50:03 +0800 Subject: [PATCH 08/63] pre-commit --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 81273b762846c..37bd50ef0a354 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5853,9 +5853,9 @@ class max type lion mammal 80.5 run monkey mammal NaN jump - Using the `names` parameter, it is possible to choose a name for the old + Using the `names` parameter, it is possible to choose a name for the old index column: - + >>> df.reset_index(names='name') name class max_speed 0 falcon bird 389.0 From 638182aa1635afc098ca57a2d7dc88819faa4695 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Fri, 4 Mar 2022 00:09:34 +0800 Subject: [PATCH 09/63] add --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 37bd50ef0a354..2d4d46d20f5a2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5853,8 +5853,8 @@ class max type lion mammal 80.5 run monkey mammal NaN jump - Using the `names` parameter, it is possible to choose a name for the old - index column: + Using the `names` parameter, it is possible to choose a name for the + old index column: >>> df.reset_index(names='name') name class max_speed From e19d8076a3b484d06ea12302182af0226f82f0c5 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sat, 5 Mar 2022 16:39:01 +0800 Subject: [PATCH 10/63] reset index --- pandas/core/frame.py | 3 ++- pandas/core/indexes/base.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2d4d46d20f5a2..9f91156352f7d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5884,7 +5884,8 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] - names = self.index.get_default_index_names(names) + default = "index" if "index" not in self else "level_0" + names = self.index.get_default_index_names(names, default) if isinstance(self.index, MultiIndex): to_insert = zip(self.index.levels, self.index.codes) else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c95a23b623e92..349c15ee6bc76 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1765,7 +1765,7 @@ def _validate_names( return new_names - def get_default_index_names(self, names=None) -> Sequence[str]: + def get_default_index_names(self, names=None, default=None) -> Sequence[str]: from pandas.core.indexes.multi import MultiIndex # if names is not None and not all(isinstance(name, str) for name in names): @@ -1784,7 +1784,6 @@ def get_default_index_names(self, names=None) -> Sequence[str]: if isinstance(self, MultiIndex): names = com.fill_missing_names(self.names) else: - default = "index" if "index" not in self else "level_0" names = [default] if self.name is None else [self.name] return names From bccb825b96d4dd68b9a58db3ff9940fedb12a59c Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sat, 5 Mar 2022 22:26:56 +0800 Subject: [PATCH 11/63] Docstring and typing validation --- pandas/core/frame.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9f91156352f7d..4c65a1ac3a411 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5853,16 +5853,14 @@ class max type lion mammal 80.5 run monkey mammal NaN jump - Using the `names` parameter, it is possible to choose a name for the - old index column: + Using the `names` parameter, choose a name for the index column: >>> df.reset_index(names='name') - name class max_speed + name class max_speed 0 falcon bird 389.0 1 parrot bird 24.0 2 lion mammal 80.5 3 monkey mammal NaN - """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From a328d9b162eeafa5427c342da6f20a6a243032fe Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 6 Mar 2022 17:40:18 +0800 Subject: [PATCH 12/63] Update frame.py --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4c65a1ac3a411..022da268ae75d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5730,7 +5730,7 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. - names : str, tuple or list, default None + names : int, str or 1-dimensional list, default None Using the given string, rename the DataFrame column which contains the index data. If the DataFrame has a MultiIndex, this has to be a list or tuple with length equal to the number of levels. From 43922332a9c1812e13cd087fcd87f6be55caeac4 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 6 Mar 2022 17:41:10 +0800 Subject: [PATCH 13/63] Update base.py --- pandas/core/indexes/base.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 349c15ee6bc76..c3287c9220bf6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -81,6 +81,7 @@ ensure_int64, ensure_object, ensure_platform_int, + is_array_like, is_bool_dtype, is_categorical_dtype, is_dtype_equal, @@ -1765,20 +1766,29 @@ def _validate_names( return new_names - def get_default_index_names(self, names=None, default=None) -> Sequence[str]: + def get_default_index_names(self, names=None, default=str) -> Sequence[str]: + """ + Get names of index. + + Parameters + ---------- + names : int, str or 1-dimensional list, default None + index names to set + default : str + default name of index + + Raises + ------ + TypeError if names not str or list-like + """ from pandas.core.indexes.multi import MultiIndex - # if names is not None and not all(isinstance(name, str) for name in names): - # raise ValueError("Names must be a string") if names is not None: - if isinstance(names, str): + if isinstance(names, str) or isinstance(names, int): names = [names] - elif isinstance(names, list) and not all( - isinstance(name, str) for name in names - ): - raise ValueError("Names must be a string") - elif not (isinstance(names, list) and not isinstance(names, str)): - raise ValueError("Names must be a string or list") + + if not isinstance(names, list) and names is not None: + raise ValueError("Index names must be int, str or 1-dimensional list") if not names: if isinstance(self, MultiIndex): From 18e36c821f2cced8e36d1eb2d55738caa89b5d6d Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 6 Mar 2022 17:41:23 +0800 Subject: [PATCH 14/63] Update test_reset_index.py --- pandas/tests/frame/methods/test_reset_index.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 0817970204c47..9f5d7d0e1a040 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -762,8 +762,9 @@ def test_reset_index_rename(float_frame): expected = Series(float_frame.index.values, name="new_name") tm.assert_series_equal(result["new_name"], expected) - with pytest.raises(ValueError, match="Names must be a string"): - float_frame.reset_index(names=1) + result = float_frame.reset_index(names=123) + expected = Series(float_frame.index.values, name=123) + tm.assert_series_equal(result[123], expected) def test_reset_index_rename_multiindex(float_frame): @@ -779,5 +780,7 @@ def test_reset_index_rename_multiindex(float_frame): tm.assert_series_equal(result["first"], expected["new_first"], check_names=False) tm.assert_series_equal(result["second"], expected["new_second"], check_names=False) - with pytest.raises(ValueError, match="Names must be a string or list"): + with pytest.raises( + ValueError, match="Index names must be int, str or 1-dimensional list" + ): stacked_df.reset_index(names={"first": "new_first", "second": "new_second"}) From 6229168112efcb67c70a5f16a3e257ae8b82a552 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 6 Mar 2022 17:42:13 +0800 Subject: [PATCH 15/63] remove is_array_like --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c3287c9220bf6..71b2ab02a63cd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -81,7 +81,6 @@ ensure_int64, ensure_object, ensure_platform_int, - is_array_like, is_bool_dtype, is_categorical_dtype, is_dtype_equal, From 42bdfda0a9f7ba72ec8745a199a0360d6d42cb23 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 8 Mar 2022 19:55:20 +0800 Subject: [PATCH 16/63] docstring --- pandas/core/frame.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 022da268ae75d..da4f6b5fc1f3f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5855,8 +5855,15 @@ class max type Using the `names` parameter, choose a name for the index column: + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df.reset_index(names='name') - name class max_speed + name class max_speed 0 falcon bird 389.0 1 parrot bird 24.0 2 lion mammal 80.5 From dffa501e46fdde787d5d806a8e2b89e52ebf8078 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 8 Mar 2022 19:56:21 +0800 Subject: [PATCH 17/63] docstring --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index da4f6b5fc1f3f..921353a47b4b7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5868,6 +5868,7 @@ class max type 1 parrot bird 24.0 2 lion mammal 80.5 3 monkey mammal NaN + """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From 88565721aa1e36285725df4e333ffc7d860cc2a6 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 8 Mar 2022 23:40:30 +0800 Subject: [PATCH 18/63] docstring --- pandas/core/frame.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b1e3ed3b14898..1c9308df3d101 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5861,19 +5861,17 @@ class max type Using the `names` parameter, choose a name for the index column: >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) - + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) >>> df.reset_index(names='name') - name class max_speed + name class max_speed 0 falcon bird 389.0 1 parrot bird 24.0 2 lion mammal 80.5 3 monkey mammal NaN - """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From 6b5a76bdfaebae198024d596a5e70e5120731eb0 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Thu, 10 Mar 2022 21:10:39 +0800 Subject: [PATCH 19/63] add none to default --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cf986c381b29d..05a62b4c6f528 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1765,7 +1765,7 @@ def _validate_names( return new_names - def get_default_index_names(self, names=None, default=str) -> Sequence[str]: + def get_default_index_names(self, names=None, default=None) -> Sequence[str]: """ Get names of index. From c79cb09aa2411aa8b63eeeb52d686c5d18612337 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Thu, 10 Mar 2022 22:54:02 +0800 Subject: [PATCH 20/63] remove return type of string --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 05a62b4c6f528..8432496e691be 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1765,7 +1765,7 @@ def _validate_names( return new_names - def get_default_index_names(self, names=None, default=None) -> Sequence[str]: + def get_default_index_names(self, names=None, default=None): """ Get names of index. From 1ccb5ab1d4245e2137942b6d3621b1ee9e91f492 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Fri, 11 Mar 2022 11:35:21 +0800 Subject: [PATCH 21/63] Update frame.py --- pandas/core/frame.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1c9308df3d101..4b61aefbe6b53 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5735,6 +5735,9 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. + + .. versionadded:: 1.5.0 + names : int, str or 1-dimensional list, default None Using the given string, rename the DataFrame column which contains the index data. If the DataFrame has a MultiIndex, this has to be a list or From bdf769181522d7b7993bc8133f72ff7dd9d3addc Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Fri, 11 Mar 2022 11:55:09 +0800 Subject: [PATCH 22/63] pre commit --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4b61aefbe6b53..af0a0334a1513 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5735,9 +5735,9 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. - + .. versionadded:: 1.5.0 - + names : int, str or 1-dimensional list, default None Using the given string, rename the DataFrame column which contains the index data. If the DataFrame has a MultiIndex, this has to be a list or From dbe2ec34b2c45a76a93ae189671ee9d1acc1cc35 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Fri, 11 Mar 2022 22:34:07 +0800 Subject: [PATCH 23/63] add return type --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8432496e691be..05a62b4c6f528 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1765,7 +1765,7 @@ def _validate_names( return new_names - def get_default_index_names(self, names=None, default=None): + def get_default_index_names(self, names=None, default=None) -> Sequence[str]: """ Get names of index. From 7002639ecd1f721aaf5b4cf39a9602202a235f64 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Fri, 11 Mar 2022 23:26:36 +0800 Subject: [PATCH 24/63] type --- pandas/core/frame.py | 2 +- pandas/core/indexes/base.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index af0a0334a1513..aa2cc74c5ec84 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5707,7 +5707,7 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", allow_duplicates: bool | lib.NoDefault = lib.no_default, - names: Hashable | Sequence[Hashable] = None, + names: Hashable | Sequence[Hashable] | None = None, ) -> DataFrame | None: """ Reset the index, or a level of it. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 05a62b4c6f528..e8fca2757057d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1765,7 +1765,9 @@ def _validate_names( return new_names - def get_default_index_names(self, names=None, default=None) -> Sequence[str]: + def get_default_index_names( + self, names: Hashable | Sequence[Hashable] | None = None, default=None + ) -> list[Hashable]: """ Get names of index. From 98865a7e72b3f0444c60112927822a8d281a712c Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sat, 12 Mar 2022 15:37:08 +0800 Subject: [PATCH 25/63] doc --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 32a57d38aafbc..b2f7a83ff4751 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5741,7 +5741,6 @@ def reset_index( Allow duplicate column labels to be created. .. versionadded:: 1.5.0 - names : int, str or 1-dimensional list, default None Using the given string, rename the DataFrame column which contains the index data. If the DataFrame has a MultiIndex, this has to be a list or From 27cbb62e9b67b0bff8b4ad9370b4e42cedd48780 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Mon, 14 Mar 2022 21:56:46 +0800 Subject: [PATCH 26/63] doc --- pandas/core/frame.py | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b2f7a83ff4751..e04bd6cf4d8ad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5715,11 +5715,9 @@ def reset_index( ) -> DataFrame | None: """ Reset the index, or a level of it. - Reset the index of the DataFrame, and use the default one instead. If the DataFrame has a MultiIndex, this method can remove one or more levels. - Parameters ---------- level : int, str, tuple, or list, default None @@ -5739,26 +5737,16 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. - - .. versionadded:: 1.5.0 - names : int, str or 1-dimensional list, default None - Using the given string, rename the DataFrame column which contains the - index data. If the DataFrame has a MultiIndex, this has to be a list or - tuple with length equal to the number of levels. - .. versionadded:: 1.5.0 - Returns ------- DataFrame or None DataFrame with the new index or None if ``inplace=True``. - See Also -------- DataFrame.set_index : Opposite of reset_index. DataFrame.reindex : Change to new indices or expand indices. DataFrame.reindex_like : Change to same indices as other DataFrame. - Examples -------- >>> df = pd.DataFrame([('bird', 389.0), @@ -5773,29 +5761,23 @@ class max_speed parrot bird 24.0 lion mammal 80.5 monkey mammal NaN - When we reset the index, the old index is added as a column, and a new sequential index is used: - >>> df.reset_index() index class max_speed 0 falcon bird 389.0 1 parrot bird 24.0 2 lion mammal 80.5 3 monkey mammal NaN - We can use the `drop` parameter to avoid the old index being added as a column: - >>> df.reset_index(drop=True) class max_speed 0 bird 389.0 1 bird 24.0 2 mammal 80.5 3 mammal NaN - You can also use `reset_index` with `MultiIndex`. - >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), ... ('bird', 'parrot'), ... ('mammal', 'lion'), @@ -5817,9 +5799,7 @@ class name parrot 24.0 fly mammal lion 80.5 run monkey NaN jump - If the index has multiple levels, we can reset a subset of them: - >>> df.reset_index(level='class') class speed species max type @@ -5828,10 +5808,8 @@ class speed species parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - If we are not dropping the index, by default, it is placed in the top level. We can place it in another level: - >>> df.reset_index(level='class', col_level=1) speed species class max type @@ -5840,10 +5818,8 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - When the index is inserted under another level, we can specify under which one with the parameter `col_fill`: - >>> df.reset_index(level='class', col_level=1, col_fill='species') species speed species class max type @@ -5852,9 +5828,7 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - If we specify a nonexistent level for `col_fill`, it is created: - >>> df.reset_index(level='class', col_level=1, col_fill='genus') genus speed species class max type @@ -5863,21 +5837,6 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - - Using the `names` parameter, choose a name for the index column: - - >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) - >>> df.reset_index(names='name') - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 lion mammal 80.5 - 3 monkey mammal NaN """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From 5efd2eb71b6ceb5eb8a58d9b49145abe9e9ecdfd Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Mon, 14 Mar 2022 22:02:15 +0800 Subject: [PATCH 27/63] doc --- pandas/core/frame.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e04bd6cf4d8ad..029e6fb1801d7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5634,7 +5634,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5647,7 +5646,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5660,7 +5658,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5673,7 +5670,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5685,7 +5681,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5698,7 +5693,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5711,13 +5705,14 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", allow_duplicates: bool | lib.NoDefault = lib.no_default, - names: Hashable | Sequence[Hashable] | None = None, ) -> DataFrame | None: """ Reset the index, or a level of it. + Reset the index of the DataFrame, and use the default one instead. If the DataFrame has a MultiIndex, this method can remove one or more levels. + Parameters ---------- level : int, str, tuple, or list, default None @@ -5737,16 +5732,20 @@ def reset_index( levels are named. If None then the index name is repeated. allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. + .. versionadded:: 1.5.0 + Returns ------- DataFrame or None DataFrame with the new index or None if ``inplace=True``. + See Also -------- DataFrame.set_index : Opposite of reset_index. DataFrame.reindex : Change to new indices or expand indices. DataFrame.reindex_like : Change to same indices as other DataFrame. + Examples -------- >>> df = pd.DataFrame([('bird', 389.0), @@ -5761,23 +5760,29 @@ class max_speed parrot bird 24.0 lion mammal 80.5 monkey mammal NaN + When we reset the index, the old index is added as a column, and a new sequential index is used: + >>> df.reset_index() index class max_speed 0 falcon bird 389.0 1 parrot bird 24.0 2 lion mammal 80.5 3 monkey mammal NaN + We can use the `drop` parameter to avoid the old index being added as a column: + >>> df.reset_index(drop=True) class max_speed 0 bird 389.0 1 bird 24.0 2 mammal 80.5 3 mammal NaN + You can also use `reset_index` with `MultiIndex`. + >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), ... ('bird', 'parrot'), ... ('mammal', 'lion'), @@ -5799,7 +5804,9 @@ class name parrot 24.0 fly mammal lion 80.5 run monkey NaN jump + If the index has multiple levels, we can reset a subset of them: + >>> df.reset_index(level='class') class speed species max type @@ -5808,8 +5815,10 @@ class speed species parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump + If we are not dropping the index, by default, it is placed in the top level. We can place it in another level: + >>> df.reset_index(level='class', col_level=1) speed species class max type @@ -5818,8 +5827,10 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump + When the index is inserted under another level, we can specify under which one with the parameter `col_fill`: + >>> df.reset_index(level='class', col_level=1, col_fill='species') species speed species class max type @@ -5828,7 +5839,9 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump + If we specify a nonexistent level for `col_fill`, it is created: + >>> df.reset_index(level='class', col_level=1, col_fill='genus') genus speed species class max type @@ -5857,12 +5870,12 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] - - default = "index" if "index" not in self else "level_0" - names = self.index.get_default_index_names(names, default) if isinstance(self.index, MultiIndex): + names = com.fill_missing_names(self.index.names) to_insert = zip(self.index.levels, self.index.codes) else: + default = "index" if "index" not in self else "level_0" + names = [default] if self.index.name is None else [self.index.name] to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) From 1148e4681a7e52cfd6e0b8d8431894269b8752d5 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Mon, 14 Mar 2022 22:11:31 +0800 Subject: [PATCH 28/63] frame --- pandas/core/frame.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 029e6fb1801d7..5e5ade702c51a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5634,6 +5634,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5646,6 +5647,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5658,6 +5660,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5670,6 +5673,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5681,6 +5685,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5693,6 +5698,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5705,6 +5711,7 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", allow_duplicates: bool | lib.NoDefault = lib.no_default, + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5870,12 +5877,12 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] + + default = "index" if "index" not in self else "level_0" + names = self.index.get_default_index_names(names, default) if isinstance(self.index, MultiIndex): - names = com.fill_missing_names(self.index.names) to_insert = zip(self.index.levels, self.index.codes) else: - default = "index" if "index" not in self else "level_0" - names = [default] if self.index.name is None else [self.index.name] to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) From e924d06abf1e941e087d65deae3ecb8b898e6c4f Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Mon, 14 Mar 2022 22:53:30 +0800 Subject: [PATCH 29/63] doc --- pandas/core/frame.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e5ade702c51a..5dfe868bad2e2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5740,6 +5740,12 @@ def reset_index( allow_duplicates : bool, optional, default lib.no_default Allow duplicate column labels to be created. + .. versionadded:: 1.5.0 + names : int, str or 1-dimensional list, default None + Using the given string, rename the DataFrame column which contains the + index data. If the DataFrame has a MultiIndex, this has to be a list or + tuple with length equal to the number of levels. + .. versionadded:: 1.5.0 Returns @@ -5857,6 +5863,20 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump + + Using the `names` parameter, choose a name for the index column: + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From db2626ac75c2fc05c22fa7f8f95124a29c662f4c Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Wed, 16 Mar 2022 09:19:24 +0800 Subject: [PATCH 30/63] doc --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5dfe868bad2e2..b6959dde00627 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5741,6 +5741,7 @@ def reset_index( Allow duplicate column labels to be created. .. versionadded:: 1.5.0 + names : int, str or 1-dimensional list, default None Using the given string, rename the DataFrame column which contains the index data. If the DataFrame has a MultiIndex, this has to be a list or From 5d2dda2b94b4e8ad2ac61f34efa0b2b10339efa7 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Wed, 16 Mar 2022 16:27:29 +0800 Subject: [PATCH 31/63] pre commit --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 525cdefd688be..2381d231e1b6d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5741,7 +5741,7 @@ def reset_index( Allow duplicate column labels to be created. .. versionadded:: 1.5.0 - + names : int, str or 1-dimensional list, default None Using the given string, rename the DataFrame column which contains the index data. If the DataFrame has a MultiIndex, this has to be a list or @@ -5901,6 +5901,7 @@ class max type default = "index" if "index" not in self else "level_0" names = self.index.get_default_index_names(names, default) + if isinstance(self.index, MultiIndex): to_insert = zip(self.index.levels, self.index.codes) else: From 540e4f94eede120deaa4279e96b24c7006cf93eb Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Wed, 16 Mar 2022 20:27:03 +0800 Subject: [PATCH 32/63] doc --- pandas/core/frame.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2381d231e1b6d..9ec3e215def32 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5866,6 +5866,7 @@ class max type monkey mammal NaN jump Using the `names` parameter, choose a name for the index column: + >>> df = pd.DataFrame([('bird', 389.0), ... ('bird', 24.0), ... ('mammal', 80.5), From 4c00430003786a00846dbafdb441597d0fa72791 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sat, 19 Mar 2022 16:52:22 +0800 Subject: [PATCH 33/63] Update frame.py --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 824457aaf5675..137662b478d77 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5871,7 +5871,7 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - + Using the `names` parameter, choose a name for the index column: >>> df = pd.DataFrame([('bird', 389.0), From e30dd1442e9e4a58cc8deec043f019735b86d9af Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sat, 19 Mar 2022 17:26:42 +0800 Subject: [PATCH 34/63] pre commit --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f67bd32325a53..a4dba926a6a7c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5873,7 +5873,7 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - + Using the `names` parameter, choose a name for the index column: >>> df = pd.DataFrame([('bird', 389.0), From 8b61ced124057eb0e36ffa4c9a5153392d77eae2 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sat, 19 Mar 2022 23:00:14 +0800 Subject: [PATCH 35/63] Update frame.py --- pandas/core/frame.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a4dba926a6a7c..80c9d59de6c83 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5877,13 +5877,13 @@ class max type Using the `names` parameter, choose a name for the index column: >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) >>> df.reset_index(names='name') - name class max_speed + name class max_speed 0 falcon bird 389.0 1 parrot bird 24.0 2 lion mammal 80.5 From d3b2444d405b5b2e37850517527372add62673fa Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sat, 19 Mar 2022 23:40:34 +0800 Subject: [PATCH 36/63] Update frame.py --- pandas/core/frame.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 80c9d59de6c83..e9ad2464ce439 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5882,12 +5882,6 @@ class max type ... ('mammal', np.nan)], ... index=['falcon', 'parrot', 'lion', 'monkey'], ... columns=('class', 'max_speed')) - >>> df.reset_index(names='name') - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 lion mammal 80.5 - 3 monkey mammal NaN """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From 775bfea07d4c19631a96405f7ff0b785f0f7f497 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 20 Mar 2022 00:10:30 +0800 Subject: [PATCH 37/63] Update frame.py --- pandas/core/frame.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e9ad2464ce439..4ba3fa071e47f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5877,11 +5877,17 @@ class max type Using the `names` parameter, choose a name for the index column: >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From f5a59cde162078d7d1c8105bdd40f6325db4a872 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 20 Mar 2022 00:22:53 +0800 Subject: [PATCH 38/63] pre commit --- pandas/core/frame.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4ba3fa071e47f..73b2f6329808d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5883,11 +5883,11 @@ class max type ... index=['falcon', 'parrot', 'lion', 'monkey'], ... columns=('class', 'max_speed')) >>> df.reset_index(names='name') - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 lion mammal 80.5 - 3 monkey mammal NaN + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From d77868f2a686bd919e1c8a4cb34d3fa80e4aa9ca Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 20 Mar 2022 19:34:17 +0800 Subject: [PATCH 39/63] df doc --- pandas/core/frame.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 73b2f6329808d..960c134814a80 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5876,18 +5876,13 @@ class max type Using the `names` parameter, choose a name for the index column: - >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) - >>> df.reset_index(names='name') - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 lion mammal 80.5 - 3 monkey mammal NaN + >>> df.reset_index(names=['classes', 'names']) + classes names speed species + max type + 0 bird falcon 389.0 fly + 1 bird parrot 24.0 fly + 2 mammal lion 80.5 run + 3 mammal monkey NaN jump """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From a4d5a7636875e6b0aaf6e11a3cebeb2351743c83 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 20 Mar 2022 19:37:28 +0800 Subject: [PATCH 40/63] df doc --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 960c134814a80..4c1e169147f02 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5877,8 +5877,8 @@ class max type Using the `names` parameter, choose a name for the index column: >>> df.reset_index(names=['classes', 'names']) - classes names speed species - max type + classes names speed species + max type 0 bird falcon 389.0 fly 1 bird parrot 24.0 fly 2 mammal lion 80.5 run From 89a0ad7d997f981f1fe417d5d24766467e42315f Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 20 Mar 2022 22:03:49 +0800 Subject: [PATCH 41/63] rst --- doc/source/whatsnew/v1.5.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1d36097c7fbfe..4dfdb7384a01f 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -44,9 +44,8 @@ Other enhancements - Implemented a complex-dtype :class:`Index`, passing a complex-dtype array-like to ``pd.Index`` will now retain complex dtype instead of casting to ``object`` (:issue:`45845`) - Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`) - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) -- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) -- +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From d80107d255c201e536d21e00b8269b610fa69dca Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 20 Mar 2022 22:57:13 +0800 Subject: [PATCH 42/63] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 4dfdb7384a01f..2a21343405978 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -45,7 +45,7 @@ Other enhancements - Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`) - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) -- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) +- .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From 247d4904616024fa0666ac64d110a3ff4d6350da Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 20 Mar 2022 23:32:17 +0800 Subject: [PATCH 43/63] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2a21343405978..9499c7a28a1d4 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -45,6 +45,7 @@ Other enhancements - Improved error message in :class:`~pandas.core.window.Rolling` when ``window`` is a frequency and ``NaT`` is in the rolling axis (:issue:`46087`) - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) +- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) - .. --------------------------------------------------------------------------- From ad19646953bdf418122f99af2e4ab6a612ea52f7 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Sun, 20 Mar 2022 23:35:42 +0800 Subject: [PATCH 44/63] Update frame.py --- pandas/core/frame.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4c1e169147f02..bcd2d0e91618e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5793,6 +5793,16 @@ class max_speed 1 parrot bird 24.0 2 lion mammal 80.5 3 monkey mammal NaN + + Using the `names` parameter, it is possible to choose a name for + the old index column: + + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN We can use the `drop` parameter to avoid the old index being added as a column: @@ -5873,16 +5883,6 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - - Using the `names` parameter, choose a name for the index column: - - >>> df.reset_index(names=['classes', 'names']) - classes names speed species - max type - 0 bird falcon 389.0 fly - 1 bird parrot 24.0 fly - 2 mammal lion 80.5 run - 3 mammal monkey NaN jump """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) From 3efafaf4b9211a70c2504aa020b210fbe9b7d3f6 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 20 Mar 2022 23:49:22 +0800 Subject: [PATCH 45/63] pre commit --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bcd2d0e91618e..72468a2396dc6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5793,10 +5793,10 @@ class max_speed 1 parrot bird 24.0 2 lion mammal 80.5 3 monkey mammal NaN - + Using the `names` parameter, it is possible to choose a name for the old index column: - + >>> df.reset_index(names='name') name class max_speed 0 falcon bird 389.0 From d614da068ed7cf35355970365f2c0b4e8e52b049 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 00:00:08 +0800 Subject: [PATCH 46/63] remove doc --- pandas/core/frame.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 72468a2396dc6..ef6f706b5fad8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5794,16 +5794,6 @@ class max_speed 2 lion mammal 80.5 3 monkey mammal NaN - Using the `names` parameter, it is possible to choose a name for - the old index column: - - >>> df.reset_index(names='name') - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 lion mammal 80.5 - 3 monkey mammal NaN - We can use the `drop` parameter to avoid the old index being added as a column: From 8af651a16f24089cdb2ec0cebde8a0c54c4b554a Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 01:00:36 +0800 Subject: [PATCH 47/63] add --- pandas/core/frame.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ef6f706b5fad8..aae09e5c11965 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5751,13 +5751,6 @@ def reset_index( .. versionadded:: 1.5.0 - names : int, str or 1-dimensional list, default None - Using the given string, rename the DataFrame column which contains the - index data. If the DataFrame has a MultiIndex, this has to be a list or - tuple with length equal to the number of levels. - - .. versionadded:: 1.5.0 - Returns ------- DataFrame or None @@ -5794,6 +5787,16 @@ class max_speed 2 lion mammal 80.5 3 monkey mammal NaN + Using the `names` parameter, it is possible to choose a name for + the old index column: + + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + We can use the `drop` parameter to avoid the old index being added as a column: From b47adb578f4a32904ee0737adce179ea9c8870a9 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 19:48:15 +0800 Subject: [PATCH 48/63] test remove first --- pandas/core/frame.py | 85 ++++++++++++------- .../tests/frame/methods/test_reset_index.py | 30 ------- 2 files changed, 56 insertions(+), 59 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aae09e5c11965..0013ddf73cddc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -58,6 +58,7 @@ FloatFormatType, FormattersType, Frequency, + IgnoreRaise, IndexKeyFunc, IndexLabel, Level, @@ -4831,17 +4832,61 @@ def reindex(self, *args, **kwargs) -> DataFrame: kwargs.pop("labels", None) return super().reindex(**kwargs) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @overload + def drop( + self, + labels: Hashable | list[Hashable] = ..., + *, + axis: Axis = ..., + index: Hashable | list[Hashable] = ..., + columns: Hashable | list[Hashable] = ..., + level: Level | None = ..., + inplace: Literal[True], + errors: IgnoreRaise = ..., + ) -> None: + ... + + @overload + def drop( + self, + labels: Hashable | list[Hashable] = ..., + *, + axis: Axis = ..., + index: Hashable | list[Hashable] = ..., + columns: Hashable | list[Hashable] = ..., + level: Level | None = ..., + inplace: Literal[False] = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame: + ... + + @overload def drop( self, - labels=None, + labels: Hashable | list[Hashable] = ..., + *, + axis: Axis = ..., + index: Hashable | list[Hashable] = ..., + columns: Hashable | list[Hashable] = ..., + level: Level | None = ..., + inplace: bool = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "drop" incompatible with supertype "NDFrame" + # github.com/python/mypy/issues/12387 + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def drop( # type: ignore[override] + self, + labels: Hashable | list[Hashable] = None, axis: Axis = 0, - index=None, - columns=None, + index: Hashable | list[Hashable] = None, + columns: Hashable | list[Hashable] = None, level: Level | None = None, inplace: bool = False, - errors: str = "raise", - ): + errors: IgnoreRaise = "raise", + ) -> DataFrame | None: """ Drop specified labels from rows or columns. @@ -5643,7 +5688,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5656,7 +5700,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5669,7 +5712,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5682,7 +5724,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5694,7 +5735,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5707,7 +5747,6 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., - names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5720,7 +5759,6 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", allow_duplicates: bool | lib.NoDefault = lib.no_default, - names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5787,16 +5825,6 @@ class max_speed 2 lion mammal 80.5 3 monkey mammal NaN - Using the `names` parameter, it is possible to choose a name for - the old index column: - - >>> df.reset_index(names='name') - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 lion mammal 80.5 - 3 monkey mammal NaN - We can use the `drop` parameter to avoid the old index being added as a column: @@ -5896,13 +5924,12 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] - - default = "index" if "index" not in self else "level_0" - names = self.index.get_default_index_names(names, default) - if isinstance(self.index, MultiIndex): + names = com.fill_missing_names(self.index.names) to_insert = zip(self.index.levels, self.index.codes) else: + default = "index" if "index" not in self else "level_0" + names = [default] if self.index.name is None else [self.index.name] to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) @@ -11205,7 +11232,7 @@ def where( inplace=False, axis=None, level=None, - errors="raise", + errors: IgnoreRaise = "raise", try_cast=lib.no_default, ): return super().where(cond, other, inplace, axis, level, errors, try_cast) @@ -11220,7 +11247,7 @@ def mask( inplace=False, axis=None, level=None, - errors="raise", + errors: IgnoreRaise = "raise", try_cast=lib.no_default, ): return super().mask(cond, other, inplace, axis, level, errors, try_cast) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 9f5d7d0e1a040..840d0c6e6bdf4 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -754,33 +754,3 @@ def test_reset_index_interval_columns_object_cast(): columns=Index(["Year", Interval(0, 1), Interval(1, 2)]), ) tm.assert_frame_equal(result, expected) - - -def test_reset_index_rename(float_frame): - # GH 6878 - result = float_frame.reset_index(names="new_name") - expected = Series(float_frame.index.values, name="new_name") - tm.assert_series_equal(result["new_name"], expected) - - result = float_frame.reset_index(names=123) - expected = Series(float_frame.index.values, name=123) - tm.assert_series_equal(result[123], expected) - - -def test_reset_index_rename_multiindex(float_frame): - # GH 6878 - stacked_df = float_frame.stack()[::2] - stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df}) - - names = ["first", "second"] - stacked_df.index.names = names - - result = stacked_df.reset_index() - expected = stacked_df.reset_index(names=["new_first", "new_second"]) - tm.assert_series_equal(result["first"], expected["new_first"], check_names=False) - tm.assert_series_equal(result["second"], expected["new_second"], check_names=False) - - with pytest.raises( - ValueError, match="Index names must be int, str or 1-dimensional list" - ): - stacked_df.reset_index(names={"first": "new_first", "second": "new_second"}) From 175ab704ca26e497b0d0d40cf63f0fcde9557c5c Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 20:22:14 +0800 Subject: [PATCH 49/63] remove --- pandas/core/indexes/base.py | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 025111b244783..8b9c537631d94 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1769,40 +1769,6 @@ def _validate_names( return new_names - def get_default_index_names( - self, names: Hashable | Sequence[Hashable] | None = None, default=None - ) -> list[Hashable]: - """ - Get names of index. - - Parameters - ---------- - names : int, str or 1-dimensional list, default None - index names to set - default : str - default name of index - - Raises - ------ - TypeError if names not str or list-like - """ - from pandas.core.indexes.multi import MultiIndex - - if names is not None: - if isinstance(names, str) or isinstance(names, int): - names = [names] - - if not isinstance(names, list) and names is not None: - raise ValueError("Index names must be int, str or 1-dimensional list") - - if not names: - if isinstance(self, MultiIndex): - names = com.fill_missing_names(self.names) - else: - names = [default] if self.name is None else [self.name] - - return names - def _get_names(self) -> FrozenList: return FrozenList((self.name,)) From aa583ee14cf1b7acc7551371fb5b26aee7b764ec Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 20:57:31 +0800 Subject: [PATCH 50/63] test --- pandas/core/indexes/base.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8b9c537631d94..f1740f5c8806e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1715,6 +1715,40 @@ def to_frame( result.index = self return result + def get_default_index_names( + self, names: Hashable | Sequence[Hashable] | None = None, default=None + ) -> list[Hashable]: + """ + Get names of index. + + Parameters + ---------- + names : int, str or 1-dimensional list, default None + index names to set + default : str + default name of index + + Raises + ------ + TypeError if names not str or list-like + """ + from pandas.core.indexes.multi import MultiIndex + + if names is not None: + if isinstance(names, str) or isinstance(names, int): + names = [names] + + if not isinstance(names, list) and names is not None: + raise ValueError("Index names must be int, str or 1-dimensional list") + + if not names: + if isinstance(self, MultiIndex): + names = com.fill_missing_names(self.names) + else: + names = [default] if self.name is None else [self.name] + + return names + # -------------------------------------------------------------------- # Name-Centric Methods From 0111d4d7cf939ef9ca84959594b06c2d3097beac Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 22:44:19 +0800 Subject: [PATCH 51/63] doc --- doc/source/whatsnew/v1.5.0.rst | 10 -- pandas/core/frame.py | 97 ++++++++----------- pandas/core/indexes/base.py | 78 +++++++-------- .../tests/frame/methods/test_reset_index.py | 30 ++++++ 4 files changed, 108 insertions(+), 107 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9c8d5d9272166..7f5b76daf25a8 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -46,7 +46,6 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) -- .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: @@ -111,13 +110,6 @@ did not have the same index as the input. df.groupby('a', dropna=True).transform(lambda x: x) df.groupby('a', dropna=True).transform('sum') -.. _whatsnew_150.notable_bug_fixes.visualization: - -Styler -^^^^^^ - -- Fix showing "None" as ylabel in :meth:`Series.plot` when not setting ylabel (:issue:`46129`) - .. _whatsnew_150.notable_bug_fixes.notable_bug_fix2: notable_bug_fix2 @@ -350,7 +342,6 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`) - Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`) - Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). -- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`) - .. --------------------------------------------------------------------------- @@ -512,7 +503,6 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) - Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) -- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) - Reshaping diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0013ddf73cddc..73b2f6329808d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -58,7 +58,6 @@ FloatFormatType, FormattersType, Frequency, - IgnoreRaise, IndexKeyFunc, IndexLabel, Level, @@ -4832,61 +4831,17 @@ def reindex(self, *args, **kwargs) -> DataFrame: kwargs.pop("labels", None) return super().reindex(**kwargs) - @overload - def drop( - self, - labels: Hashable | list[Hashable] = ..., - *, - axis: Axis = ..., - index: Hashable | list[Hashable] = ..., - columns: Hashable | list[Hashable] = ..., - level: Level | None = ..., - inplace: Literal[True], - errors: IgnoreRaise = ..., - ) -> None: - ... - - @overload - def drop( - self, - labels: Hashable | list[Hashable] = ..., - *, - axis: Axis = ..., - index: Hashable | list[Hashable] = ..., - columns: Hashable | list[Hashable] = ..., - level: Level | None = ..., - inplace: Literal[False] = ..., - errors: IgnoreRaise = ..., - ) -> DataFrame: - ... - - @overload - def drop( - self, - labels: Hashable | list[Hashable] = ..., - *, - axis: Axis = ..., - index: Hashable | list[Hashable] = ..., - columns: Hashable | list[Hashable] = ..., - level: Level | None = ..., - inplace: bool = ..., - errors: IgnoreRaise = ..., - ) -> DataFrame | None: - ... - - # error: Signature of "drop" incompatible with supertype "NDFrame" - # github.com/python/mypy/issues/12387 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) - def drop( # type: ignore[override] + def drop( self, - labels: Hashable | list[Hashable] = None, + labels=None, axis: Axis = 0, - index: Hashable | list[Hashable] = None, - columns: Hashable | list[Hashable] = None, + index=None, + columns=None, level: Level | None = None, inplace: bool = False, - errors: IgnoreRaise = "raise", - ) -> DataFrame | None: + errors: str = "raise", + ): """ Drop specified labels from rows or columns. @@ -5688,6 +5643,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame: ... @@ -5700,6 +5656,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5712,6 +5669,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5724,6 +5682,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5735,6 +5694,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> None: ... @@ -5747,6 +5707,7 @@ def reset_index( col_level: Hashable = ..., col_fill: Hashable = ..., allow_duplicates: bool | lib.NoDefault = ..., + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: ... @@ -5759,6 +5720,7 @@ def reset_index( col_level: Hashable = 0, col_fill: Hashable = "", allow_duplicates: bool | lib.NoDefault = lib.no_default, + names: Hashable | Sequence[Hashable] = None, ) -> DataFrame | None: """ Reset the index, or a level of it. @@ -5789,6 +5751,13 @@ def reset_index( .. versionadded:: 1.5.0 + names : int, str or 1-dimensional list, default None + Using the given string, rename the DataFrame column which contains the + index data. If the DataFrame has a MultiIndex, this has to be a list or + tuple with length equal to the number of levels. + + .. versionadded:: 1.5.0 + Returns ------- DataFrame or None @@ -5904,6 +5873,21 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump + + Using the `names` parameter, choose a name for the index column: + + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df.reset_index(names='name') + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) @@ -5924,12 +5908,13 @@ class max type if not drop: to_insert: Iterable[tuple[Any, Any | None]] + + default = "index" if "index" not in self else "level_0" + names = self.index.get_default_index_names(names, default) + if isinstance(self.index, MultiIndex): - names = com.fill_missing_names(self.index.names) to_insert = zip(self.index.levels, self.index.codes) else: - default = "index" if "index" not in self else "level_0" - names = [default] if self.index.name is None else [self.index.name] to_insert = ((self.index, None),) multi_col = isinstance(self.columns, MultiIndex) @@ -11232,7 +11217,7 @@ def where( inplace=False, axis=None, level=None, - errors: IgnoreRaise = "raise", + errors="raise", try_cast=lib.no_default, ): return super().where(cond, other, inplace, axis, level, errors, try_cast) @@ -11247,7 +11232,7 @@ def mask( inplace=False, axis=None, level=None, - errors: IgnoreRaise = "raise", + errors="raise", try_cast=lib.no_default, ): return super().mask(cond, other, inplace, axis, level, errors, try_cast) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f1740f5c8806e..0c77e298d8d69 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -9,7 +9,6 @@ Any, Callable, Hashable, - Iterable, Literal, Sequence, TypeVar, @@ -47,7 +46,6 @@ Dtype, DtypeObj, F, - IgnoreRaise, Shape, npt, ) @@ -1715,40 +1713,6 @@ def to_frame( result.index = self return result - def get_default_index_names( - self, names: Hashable | Sequence[Hashable] | None = None, default=None - ) -> list[Hashable]: - """ - Get names of index. - - Parameters - ---------- - names : int, str or 1-dimensional list, default None - index names to set - default : str - default name of index - - Raises - ------ - TypeError if names not str or list-like - """ - from pandas.core.indexes.multi import MultiIndex - - if names is not None: - if isinstance(names, str) or isinstance(names, int): - names = [names] - - if not isinstance(names, list) and names is not None: - raise ValueError("Index names must be int, str or 1-dimensional list") - - if not names: - if isinstance(self, MultiIndex): - names = com.fill_missing_names(self.names) - else: - names = [default] if self.name is None else [self.name] - - return names - # -------------------------------------------------------------------- # Name-Centric Methods @@ -1803,6 +1767,42 @@ def _validate_names( return new_names + def get_default_index_names( + self, names: Hashable | Sequence[Hashable] | None = None, default=None + ) -> list[Hashable]: + """ + Get names of index. + + Parameters + ---------- + names : int, str or 1-dimensional list, default None + index names to set + default : str + default name of index + + Raises + ------ + TypeError + if names not str or list-like + + """ + from pandas.core.indexes.multi import MultiIndex + + if names is not None: + if isinstance(names, str) or isinstance(names, int): + names = [names] + + if not isinstance(names, list) and names is not None: + raise ValueError("Index names must be int, str or 1-dimensional list") + + if not names: + if isinstance(self, MultiIndex): + names = com.fill_missing_names(self.names) + else: + names = [default] if self.name is None else [self.name] + + return names + def _get_names(self) -> FrozenList: return FrozenList((self.name,)) @@ -6846,11 +6846,7 @@ def insert(self, loc: int, item) -> Index: # TODO(2.0) can use Index instead of self._constructor return self._constructor._with_infer(new_values, name=self.name) - def drop( - self, - labels: Index | np.ndarray | Iterable[Hashable], - errors: IgnoreRaise = "raise", - ) -> Index: + def drop(self, labels, errors: str_t = "raise") -> Index: """ Make new Index with passed list of labels deleted. diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 840d0c6e6bdf4..9f5d7d0e1a040 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -754,3 +754,33 @@ def test_reset_index_interval_columns_object_cast(): columns=Index(["Year", Interval(0, 1), Interval(1, 2)]), ) tm.assert_frame_equal(result, expected) + + +def test_reset_index_rename(float_frame): + # GH 6878 + result = float_frame.reset_index(names="new_name") + expected = Series(float_frame.index.values, name="new_name") + tm.assert_series_equal(result["new_name"], expected) + + result = float_frame.reset_index(names=123) + expected = Series(float_frame.index.values, name=123) + tm.assert_series_equal(result[123], expected) + + +def test_reset_index_rename_multiindex(float_frame): + # GH 6878 + stacked_df = float_frame.stack()[::2] + stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df}) + + names = ["first", "second"] + stacked_df.index.names = names + + result = stacked_df.reset_index() + expected = stacked_df.reset_index(names=["new_first", "new_second"]) + tm.assert_series_equal(result["first"], expected["new_first"], check_names=False) + tm.assert_series_equal(result["second"], expected["new_second"], check_names=False) + + with pytest.raises( + ValueError, match="Index names must be int, str or 1-dimensional list" + ): + stacked_df.reset_index(names={"first": "new_first", "second": "new_second"}) From 11085c785ff3c3262cee045a88a26c1c6e0e7185 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 23:20:48 +0800 Subject: [PATCH 52/63] add --- doc/source/whatsnew/v1.5.0.rst | 10 ++++ pandas/core/frame.py | 88 ++++++++++++++++++++++++---------- pandas/core/indexes/base.py | 11 +++-- 3 files changed, 81 insertions(+), 28 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7f5b76daf25a8..9c8d5d9272166 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -46,6 +46,7 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) +- .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: @@ -110,6 +111,13 @@ did not have the same index as the input. df.groupby('a', dropna=True).transform(lambda x: x) df.groupby('a', dropna=True).transform('sum') +.. _whatsnew_150.notable_bug_fixes.visualization: + +Styler +^^^^^^ + +- Fix showing "None" as ylabel in :meth:`Series.plot` when not setting ylabel (:issue:`46129`) + .. _whatsnew_150.notable_bug_fixes.notable_bug_fix2: notable_bug_fix2 @@ -342,6 +350,7 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`) - Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`) - Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). +- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`) - .. --------------------------------------------------------------------------- @@ -503,6 +512,7 @@ Groupby/resample/rolling - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) - Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) +- Bug in :meth:`GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) - Reshaping diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 73b2f6329808d..760bb29f1d117 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -58,6 +58,7 @@ FloatFormatType, FormattersType, Frequency, + IgnoreRaise, IndexKeyFunc, IndexLabel, Level, @@ -4831,17 +4832,61 @@ def reindex(self, *args, **kwargs) -> DataFrame: kwargs.pop("labels", None) return super().reindex(**kwargs) - @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @overload + def drop( + self, + labels: Hashable | list[Hashable] = ..., + *, + axis: Axis = ..., + index: Hashable | list[Hashable] = ..., + columns: Hashable | list[Hashable] = ..., + level: Level | None = ..., + inplace: Literal[True], + errors: IgnoreRaise = ..., + ) -> None: + ... + + @overload def drop( self, - labels=None, + labels: Hashable | list[Hashable] = ..., + *, + axis: Axis = ..., + index: Hashable | list[Hashable] = ..., + columns: Hashable | list[Hashable] = ..., + level: Level | None = ..., + inplace: Literal[False] = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame: + ... + + @overload + def drop( + self, + labels: Hashable | list[Hashable] = ..., + *, + axis: Axis = ..., + index: Hashable | list[Hashable] = ..., + columns: Hashable | list[Hashable] = ..., + level: Level | None = ..., + inplace: bool = ..., + errors: IgnoreRaise = ..., + ) -> DataFrame | None: + ... + + # error: Signature of "drop" incompatible with supertype "NDFrame" + # github.com/python/mypy/issues/12387 + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + def drop( # type: ignore[override] + self, + labels: Hashable | list[Hashable] = None, axis: Axis = 0, - index=None, - columns=None, + index: Hashable | list[Hashable] = None, + columns: Hashable | list[Hashable] = None, level: Level | None = None, inplace: bool = False, - errors: str = "raise", - ): + errors: IgnoreRaise = "raise", + ) -> DataFrame | None: """ Drop specified labels from rows or columns. @@ -5757,7 +5802,7 @@ def reset_index( tuple with length equal to the number of levels. .. versionadded:: 1.5.0 - + s Returns ------- DataFrame or None @@ -5828,6 +5873,16 @@ class name mammal lion 80.5 run monkey NaN jump + Using the `names` parameter, choose a name for the index column: + + >>> df.reset_index(names=['classes', 'names']) + classes names speed species + max type + 0 bird falcon 389.0 fly + 1 bird parrot 24.0 fly + 2 mammal lion 80.5 run + 3 mammal monkey NaN jump + If the index has multiple levels, we can reset a subset of them: >>> df.reset_index(level='class') @@ -5873,21 +5928,6 @@ class max type parrot bird 24.0 fly lion mammal 80.5 run monkey mammal NaN jump - - Using the `names` parameter, choose a name for the index column: - - >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) - >>> df.reset_index(names='name') - name class max_speed - 0 falcon bird 389.0 - 1 parrot bird 24.0 - 2 lion mammal 80.5 - 3 monkey mammal NaN """ inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) @@ -11217,7 +11257,7 @@ def where( inplace=False, axis=None, level=None, - errors="raise", + errors: IgnoreRaise = "raise", try_cast=lib.no_default, ): return super().where(cond, other, inplace, axis, level, errors, try_cast) @@ -11232,7 +11272,7 @@ def mask( inplace=False, axis=None, level=None, - errors="raise", + errors: IgnoreRaise = "raise", try_cast=lib.no_default, ): return super().mask(cond, other, inplace, axis, level, errors, try_cast) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0c77e298d8d69..25d84dd74214c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -9,6 +9,7 @@ Any, Callable, Hashable, + Iterable, Literal, Sequence, TypeVar, @@ -46,6 +47,7 @@ Dtype, DtypeObj, F, + IgnoreRaise, Shape, npt, ) @@ -1772,19 +1774,16 @@ def get_default_index_names( ) -> list[Hashable]: """ Get names of index. - Parameters ---------- names : int, str or 1-dimensional list, default None index names to set default : str default name of index - Raises ------ TypeError if names not str or list-like - """ from pandas.core.indexes.multi import MultiIndex @@ -6846,7 +6845,11 @@ def insert(self, loc: int, item) -> Index: # TODO(2.0) can use Index instead of self._constructor return self._constructor._with_infer(new_values, name=self.name) - def drop(self, labels, errors: str_t = "raise") -> Index: + def drop( + self, + labels: Index | np.ndarray | Iterable[Hashable], + errors: IgnoreRaise = "raise", + ) -> Index: """ Make new Index with passed list of labels deleted. From abfb95d24ad8d4774439185e5c21f6c075a503b9 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 23:24:54 +0800 Subject: [PATCH 53/63] doc --- pandas/core/indexes/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 25d84dd74214c..2000662588404 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1774,12 +1774,14 @@ def get_default_index_names( ) -> list[Hashable]: """ Get names of index. + Parameters ---------- names : int, str or 1-dimensional list, default None index names to set default : str default name of index + Raises ------ TypeError From 0086bd50a985b462abd103e90dcb8a12fe0486ee Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 22 Mar 2022 23:44:53 +0800 Subject: [PATCH 54/63] add --- pandas/core/indexes/base.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2000662588404..1372dab8a8edd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1772,21 +1772,6 @@ def _validate_names( def get_default_index_names( self, names: Hashable | Sequence[Hashable] | None = None, default=None ) -> list[Hashable]: - """ - Get names of index. - - Parameters - ---------- - names : int, str or 1-dimensional list, default None - index names to set - default : str - default name of index - - Raises - ------ - TypeError - if names not str or list-like - """ from pandas.core.indexes.multi import MultiIndex if names is not None: From 7778d4b2df278cf1e572142559b83ffc6fcab6e0 Mon Sep 17 00:00:00 2001 From: Khor Chean Wei Date: Wed, 23 Mar 2022 00:23:18 +0800 Subject: [PATCH 55/63] Update base.py --- pandas/core/indexes/base.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2000662588404..1372dab8a8edd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1772,21 +1772,6 @@ def _validate_names( def get_default_index_names( self, names: Hashable | Sequence[Hashable] | None = None, default=None ) -> list[Hashable]: - """ - Get names of index. - - Parameters - ---------- - names : int, str or 1-dimensional list, default None - index names to set - default : str - default name of index - - Raises - ------ - TypeError - if names not str or list-like - """ from pandas.core.indexes.multi import MultiIndex if names is not None: From 142889482da3c33bae6aa57321594efa5fce0356 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Wed, 23 Mar 2022 00:59:48 +0800 Subject: [PATCH 56/63] doc --- pandas/core/frame.py | 2 +- pandas/core/indexes/base.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 760bb29f1d117..24d651b09f8a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5802,7 +5802,7 @@ def reset_index( tuple with length equal to the number of levels. .. versionadded:: 1.5.0 - s + Returns ------- DataFrame or None diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1372dab8a8edd..2000662588404 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1772,6 +1772,21 @@ def _validate_names( def get_default_index_names( self, names: Hashable | Sequence[Hashable] | None = None, default=None ) -> list[Hashable]: + """ + Get names of index. + + Parameters + ---------- + names : int, str or 1-dimensional list, default None + index names to set + default : str + default name of index + + Raises + ------ + TypeError + if names not str or list-like + """ from pandas.core.indexes.multi import MultiIndex if names is not None: From d2939c51c09eb640d1514748ee91dfdf78ab5fc3 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Wed, 23 Mar 2022 01:03:30 +0800 Subject: [PATCH 57/63] doc --- pandas/core/indexes/base.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1372dab8a8edd..2000662588404 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1772,6 +1772,21 @@ def _validate_names( def get_default_index_names( self, names: Hashable | Sequence[Hashable] | None = None, default=None ) -> list[Hashable]: + """ + Get names of index. + + Parameters + ---------- + names : int, str or 1-dimensional list, default None + index names to set + default : str + default name of index + + Raises + ------ + TypeError + if names not str or list-like + """ from pandas.core.indexes.multi import MultiIndex if names is not None: From ca641f6e4623d95fa9d634d0b52772e4346370f2 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Wed, 23 Mar 2022 20:49:48 +0800 Subject: [PATCH 58/63] add methods --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9c8d5d9272166..88567b4cb7986 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -46,7 +46,7 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) -- +- :class:`Index` now contains ``get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From 51b07240f1775ae2a514cb1d6daa533cf060ffd0 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Wed, 23 Mar 2022 21:35:16 +0800 Subject: [PATCH 59/63] add --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2000662588404..c4300adaa3bbf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1778,9 +1778,9 @@ def get_default_index_names( Parameters ---------- names : int, str or 1-dimensional list, default None - index names to set + Index names to set. default : str - default name of index + Default name of index. Raises ------ From 83e81a605b3d8763491ac30ee6b467ad505af05b Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 27 Mar 2022 14:06:42 +0800 Subject: [PATCH 60/63] doc --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/frame.py | 2 +- pandas/core/indexes/base.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 86c8ec75e8345..7b4dc95289312 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -47,7 +47,7 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) -- :class:`Index` now contains ``get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) +- :class:`Index` now contains ``_get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 24d651b09f8a8..e47d968f100ba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5950,7 +5950,7 @@ class max type to_insert: Iterable[tuple[Any, Any | None]] default = "index" if "index" not in self else "level_0" - names = self.index.get_default_index_names(names, default) + names = self.index._get_default_index_names(names, default) if isinstance(self.index, MultiIndex): to_insert = zip(self.index.levels, self.index.codes) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c4300adaa3bbf..628a1d8d96af7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1769,7 +1769,7 @@ def _validate_names( return new_names - def get_default_index_names( + def _get_default_index_names( self, names: Hashable | Sequence[Hashable] | None = None, default=None ) -> list[Hashable]: """ From 774a509db360107037f8a710883c7f301be03392 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 27 Mar 2022 14:59:27 +0800 Subject: [PATCH 61/63] doc --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/frame.py | 2 +- pandas/core/indexes/base.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7b4dc95289312..86c8ec75e8345 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -47,7 +47,7 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) -- :class:`Index` now contains ``_get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) +- :class:`Index` now contains ``get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e47d968f100ba..24d651b09f8a8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5950,7 +5950,7 @@ class max type to_insert: Iterable[tuple[Any, Any | None]] default = "index" if "index" not in self else "level_0" - names = self.index._get_default_index_names(names, default) + names = self.index.get_default_index_names(names, default) if isinstance(self.index, MultiIndex): to_insert = zip(self.index.levels, self.index.codes) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 628a1d8d96af7..c4300adaa3bbf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1769,7 +1769,7 @@ def _validate_names( return new_names - def _get_default_index_names( + def get_default_index_names( self, names: Hashable | Sequence[Hashable] | None = None, default=None ) -> list[Hashable]: """ From 2e99a82c42ab9e511bb7ad5aba3238567bf7d2a6 Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Sun, 27 Mar 2022 15:32:43 +0800 Subject: [PATCH 62/63] doc --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/frame.py | 2 +- pandas/core/indexes/base.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 86c8ec75e8345..7b4dc95289312 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -47,7 +47,7 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) -- :class:`Index` now contains ``get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) +- :class:`Index` now contains ``_get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 24d651b09f8a8..e47d968f100ba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5950,7 +5950,7 @@ class max type to_insert: Iterable[tuple[Any, Any | None]] default = "index" if "index" not in self else "level_0" - names = self.index.get_default_index_names(names, default) + names = self.index._get_default_index_names(names, default) if isinstance(self.index, MultiIndex): to_insert = zip(self.index.levels, self.index.codes) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c4300adaa3bbf..628a1d8d96af7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1769,7 +1769,7 @@ def _validate_names( return new_names - def get_default_index_names( + def _get_default_index_names( self, names: Hashable | Sequence[Hashable] | None = None, default=None ) -> list[Hashable]: """ From 9b2deb5f16f2affd314c5fcf2905cf524fa1ca4f Mon Sep 17 00:00:00 2001 From: "chean.wei.khor" Date: Tue, 29 Mar 2022 23:29:57 +0800 Subject: [PATCH 63/63] add test --- doc/source/whatsnew/v1.5.0.rst | 1 - pandas/core/indexes/base.py | 2 +- pandas/tests/frame/methods/test_reset_index.py | 11 ++++++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7b4dc95289312..8da84923a979a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -47,7 +47,6 @@ Other enhancements - :class:`Series` and :class:`DataFrame` with ``IntegerDtype`` now supports bitwise operations (:issue:`34463`) - Add ``milliseconds`` field support for :class:`~pandas.DateOffset` (:issue:`43371`) - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) -- :class:`Index` now contains ``_get_default_index_names`` method to get names of index for DataFrame (:issue:`6878`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 628a1d8d96af7..09543cdee56a2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1794,7 +1794,7 @@ def _get_default_index_names( names = [names] if not isinstance(names, list) and names is not None: - raise ValueError("Index names must be int, str or 1-dimensional list") + raise ValueError("Index names must be str or 1-dimensional list") if not names: if isinstance(self, MultiIndex): diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 9f5d7d0e1a040..37431bc291b76 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -780,7 +780,16 @@ def test_reset_index_rename_multiindex(float_frame): tm.assert_series_equal(result["first"], expected["new_first"], check_names=False) tm.assert_series_equal(result["second"], expected["new_second"], check_names=False) + +def test_errorreset_index_rename(float_frame): + # GH 6878 + stacked_df = float_frame.stack()[::2] + stacked_df = DataFrame({"first": stacked_df, "second": stacked_df}) + with pytest.raises( - ValueError, match="Index names must be int, str or 1-dimensional list" + ValueError, match="Index names must be str or 1-dimensional list" ): stacked_df.reset_index(names={"first": "new_first", "second": "new_second"}) + + with pytest.raises(IndexError, match="list index out of range"): + stacked_df.reset_index(names=["new_first"])