From 3c97a597d4f44a9b9b0354c789987cb1db66e5e2 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 3 Jan 2023 16:05:01 +0100 Subject: [PATCH 1/4] add tests --- pandas/tests/frame/test_constructors.py | 11 +++++++++++ pandas/tests/series/test_constructors.py | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 25f82eb7ff4b3..1f0f03bf64bdb 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2108,6 +2108,17 @@ def test_constructor_frame_copy(self, float_frame): assert (cop["A"] == 5).all() assert not (float_frame["A"] == 5).all() + def test_constructor_frame_shallow_copy(self, float_frame): + # constructing a DataFrame from DataFrame with copy=False should still + # give a "shallow" copy (share data, not attributes) + # https://github.com/pandas-dev/pandas/issues/49523 + orig = float_frame.copy() + cop = DataFrame(float_frame) + assert cop._mgr is not float_frame._mgr + # Overwriting index of copy doesn't change original + cop.index = np.arange(len(cop)) + tm.assert_frame_equal(float_frame, orig) + def test_constructor_ndarray_copy(self, float_frame, using_array_manager): if not using_array_manager: df = DataFrame(float_frame.values) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f856f18552594..cb605d1c8f96d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -719,6 +719,18 @@ def test_constructor_limit_copies(self, index): # we make 1 copy; this is just a smoke test here assert s._mgr.blocks[0].values is not index + def test_constructor_shallow_copy(self): + # constructing a Series from Series with copy=False should still + # give a "shallow" copy (share data, not attributes) + # https://github.com/pandas-dev/pandas/issues/49523 + s = Series([1, 2, 3]) + s_orig = s.copy() + s2 = Series(s) + assert s2._mgr is not s._mgr + # Overwriting index of s2 doesn't change s + s2.index = ["a", "b", "c"] + tm.assert_series_equal(s, s_orig) + def test_constructor_pass_none(self): s = Series(None, index=range(5)) assert s.dtype == np.float64 From 1b7de43064f9e3b6a897a478fdef251f3d93f565 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 3 Jan 2023 16:23:00 +0100 Subject: [PATCH 2/4] fix implementation and add whatsnew --- doc/source/whatsnew/v2.0.0.rst | 5 +++++ pandas/core/frame.py | 2 ++ pandas/core/series.py | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 22f6659367683..d931a804cf7d7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -496,6 +496,11 @@ Other API changes or :attr:`~DataFrame.iloc` (thus, ``df.loc[:, :]`` or ``df.iloc[:, :]``) now returns a new DataFrame (shallow copy) instead of the original DataFrame, consistent with other methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) +- The :class:`Series` and :class:`DataFrame` constructors will now return a shallow copy + (i.e. share data, but not attributes) when passed a Series and DataFrame, respectively, + and with the default of ``copy=False`` (and if no other triggers a copy). Previously, + the new Series or DataFrame would share the index attribute (e.g. ``df.index = ...`` + would also update the index of the parent or child) (:issue:`49523`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d916fdc5a0ee4..8fe4a1234647b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -637,6 +637,8 @@ def __init__( if isinstance(data, DataFrame): data = data._mgr + if not copy: + data = data.copy(deep=False) if isinstance(data, (BlockManager, ArrayManager)): # first check if a Manager is passed without any other arguments diff --git a/pandas/core/series.py b/pandas/core/series.py index b5d73373f061e..6216cd43e76a5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -420,10 +420,11 @@ def __init__( elif isinstance(data, Series): if index is None: index = data.index + data = data._mgr.copy(deep=False) else: data = data.reindex(index, copy=copy) copy = False - data = data._mgr + data = data._mgr elif is_dict_like(data): data, index = self._init_dict(data, index, dtype) dtype = None From c9ca480c4c9c63b8f49666240dac0fbb6258e2cb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 23 Jan 2023 15:51:46 +0100 Subject: [PATCH 3/4] update note and add comment --- doc/source/whatsnew/v2.0.0.rst | 8 ++++---- pandas/core/frame.py | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index ed1467c39abc6..ca5168f5ea4ef 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -600,10 +600,10 @@ Other API changes new DataFrame (shallow copy) instead of the original DataFrame, consistent with other methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`) - The :class:`Series` and :class:`DataFrame` constructors will now return a shallow copy - (i.e. share data, but not attributes) when passed a Series and DataFrame, respectively, - and with the default of ``copy=False`` (and if no other triggers a copy). Previously, - the new Series or DataFrame would share the index attribute (e.g. ``df.index = ...`` - would also update the index of the parent or child) (:issue:`49523`) + (i.e. share data, but not attributes) when passed a Series and DataFrame, + respectively, and with the default of ``copy=False`` (and if no other keyword triggers + a copy). Previously, the new Series or DataFrame would share the index attribute (e.g. + ``df.index = ...`` would also update the index of the parent or child) (:issue:`49523`) - Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`) - Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`) - :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d9aafd3b98913..f1f42e7b2015f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -642,6 +642,8 @@ def __init__( if isinstance(data, DataFrame): data = data._mgr if not copy: + # if not copying data, ensure to still return a shallow copy + # to avoid the result sharing the same Manager data = data.copy(deep=False) if isinstance(data, (BlockManager, ArrayManager)): From d355032c7db1cc01dcbe83018a3329257c232193 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 10 Feb 2023 16:30:26 +0100 Subject: [PATCH 4/4] fixup CoW test --- pandas/tests/frame/test_constructors.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d171544403377..d4d2164f2a4a7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -285,10 +285,8 @@ def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) if using_copy_on_write: - # TODO(CoW) doesn't mutate original should_be_view.iloc[0, 0] = 99 - # assert df.values[0, 0] == 1 - assert df.values[0, 0] == 99 + assert df.values[0, 0] == 1 else: should_be_view[0][0] = 99 assert df.values[0, 0] == 99