From 64b0ea148cd2a3515708c8841bc9ff4d92d6b4ed Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Tue, 31 Dec 2019 20:51:37 +0800 Subject: [PATCH 1/2] BUG: Series setitem with bool indexer incorrect result (GH30567) Series.__setitem__ gives wrong result with bool indexer and when length of new data matches the number of Trues and new data is neither a Series nor a numpy array --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/internals/blocks.py | 15 +++++++-------- pandas/tests/indexing/test_indexing.py | 10 ++++++++++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 570ce11238327..7bba6feb1c2e8 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -858,6 +858,7 @@ Indexing - Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`) - :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`) - Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) +- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) Missing ^^^^^^^ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 664f6ea75a3be..15fc30030ef2f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -944,15 +944,14 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) and np.any(mask[mask]) and getattr(new, "ndim", 1) == 1 ): - - if not ( - mask.shape[-1] == len(new) - or mask[mask].shape[-1] == len(new) - or len(new) == 1 - ): + if mask[mask].shape[-1] == len(new): # GH 30567 + np.place(new_values, mask, new) + elif mask.shape[-1] == len(new) or len(new) == 1: + np.putmask(new_values, mask, new) + else: raise ValueError("cannot assign mismatch length to masked array") - - np.putmask(new_values, mask, new) + else: + np.putmask(new_values, mask, new) # maybe upcast me elif mask.any(): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d75afd1540f22..ea003a72490f9 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1190,3 +1190,13 @@ def test_duplicate_index_mistyped_key_raises_keyerror(): with pytest.raises(KeyError): ser.index._engine.get_loc(None) + + +def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(): + # GH 30567 + ser = pd.Series([None] * 10) + mask = [False] * 3 + [True] * 5 + [False] * 2 + ser[mask] = range(5) + result = ser + expected = pd.Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") + tm.assert_series_equal(result, expected) From b2aa9fbf6bd82916ab9d54b0e7f58832b92ce36d Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 1 Jan 2020 16:54:45 +0800 Subject: [PATCH 2/2] CLN: added comment to explain the issue (GH30567) --- pandas/core/internals/blocks.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 15fc30030ef2f..e47783221ff5d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -944,7 +944,13 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False) and np.any(mask[mask]) and getattr(new, "ndim", 1) == 1 ): - if mask[mask].shape[-1] == len(new): # GH 30567 + if mask[mask].shape[-1] == len(new): + # GH 30567 + # If length of ``new`` is less than the length of ``new_values``, + # `np.putmask` would first repeat the ``new`` array and then + # assign the masked values hence produces incorrect result. + # `np.place` on the other hand uses the ``new`` values at it is + # to place in the masked locations of ``new_values`` np.place(new_values, mask, new) elif mask.shape[-1] == len(new) or len(new) == 1: np.putmask(new_values, mask, new)