diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 232f879285543..3dfb0f70b8142 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,6 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b2b6e02e908c5..ca381160de0df 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1115,17 +1115,21 @@ def to_series(self, index=None, name=None): return Series(self._to_embed(), index=index, name=name) - def to_frame(self, index=True): + def to_frame(self, index=True, name=None): """ Create a DataFrame with a column containing the Index. - .. versionadded:: 0.21.0 + .. versionadded:: 0.24.0 Parameters ---------- index : boolean, default True Set the index of the returned DataFrame as the original Index. + name : object, default None + The passed name should substitute for the index name (if it has + one). + Returns ------- DataFrame @@ -1153,10 +1157,19 @@ def to_frame(self, index=True): 0 Ant 1 Bear 2 Cow + + To override the name of the resulting column, specify `name`: + + >>> idx.to_frame(index=False, name='zoo') + zoo + 0 Ant + 1 Bear + 2 Cow """ from pandas import DataFrame - name = self.name or 0 + if name is None: + name = self.name or 0 result = DataFrame({name: self.values.copy()}) if index: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4f38f61f7b0e4..a7932f667f6de 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1126,20 +1126,23 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) - def to_frame(self, index=True): + def to_frame(self, index=True, name=None): """ Create a DataFrame with the levels of the MultiIndex as columns. Column ordering is determined by the DataFrame constructor with data as a dict. - .. versionadded:: 0.20.0 + .. versionadded:: 0.24.0 Parameters ---------- index : boolean, default True Set the index of the returned DataFrame as the original MultiIndex. + name : list / sequence of strings, optional + The passed names should substitute index level names. + Returns ------- DataFrame : a DataFrame containing the original MultiIndex data. @@ -1150,10 +1153,22 @@ def to_frame(self, index=True): """ from pandas import DataFrame + if name is not None: + if not is_list_like(name): + raise TypeError("'name' must be a list / sequence " + "of column names.") + + if len(name) != len(self.levels): + raise ValueError("'name' should have same length as " + "number of levels on index.") + idx_names = name + else: + idx_names = self.names + result = DataFrame({(name or level): self._get_level_values(level) for name, level in - zip(self.names, range(len(self.levels)))}, + zip(idx_names, range(len(self.levels)))}, copy=False) if index: result.index = self diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 56f59851d6d04..49a247608ab0b 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -66,19 +66,24 @@ def test_to_series_with_arguments(self): assert s.index is not idx assert s.name != idx.name - def test_to_frame(self): - # see gh-15230 + @pytest.mark.parametrize("name", [None, "new_name"]) + def test_to_frame(self, name): + # see GH-15230, GH-22580 idx = self.create_index() - name = idx.name or 0 - df = idx.to_frame() + if name: + idx_name = name + else: + idx_name = idx.name or 0 + + df = idx.to_frame(name=idx_name) assert df.index is idx assert len(df.columns) == 1 - assert df.columns[0] == name - assert df[name].values is not idx.values + assert df.columns[0] == idx_name + assert df[idx_name].values is not idx.values - df = idx.to_frame(index=False) + df = idx.to_frame(index=False, name=idx_name) assert df.index is not idx def test_shift(self): diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index fcc22390e17a1..8c9566b7e651f 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -37,6 +37,27 @@ def test_to_frame(): expected.index = index tm.assert_frame_equal(result, expected) + # See GH-22580 + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False, name=['first', 'second']) + expected = DataFrame(tuples) + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=['first', 'second']) + expected.index = index + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + msg = "'name' must be a list / sequence of column names." + with tm.assert_raises_regex(TypeError, msg): + index.to_frame(name='first') + + msg = "'name' should have same length as number of levels on index." + with tm.assert_raises_regex(ValueError, msg): + index.to_frame(name=['first']) + + # Tests for datetime index index = MultiIndex.from_product([range(5), pd.date_range('20130101', periods=3)]) result = index.to_frame(index=False) @@ -45,12 +66,21 @@ def test_to_frame(): 1: np.tile(pd.date_range('20130101', periods=3), 5)}) tm.assert_frame_equal(result, expected) - index = MultiIndex.from_product([range(5), - pd.date_range('20130101', periods=3)]) result = index.to_frame() expected.index = index tm.assert_frame_equal(result, expected) + # See GH-22580 + result = index.to_frame(index=False, name=['first', 'second']) + expected = DataFrame( + {'first': np.repeat(np.arange(5, dtype='int64'), 3), + 'second': np.tile(pd.date_range('20130101', periods=3), 5)}) + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=['first', 'second']) + expected.index = index + tm.assert_frame_equal(result, expected) + def test_to_hierarchical(): index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (