From 823d8902013f99a969aff139b76f25a3c9a1aef9 Mon Sep 17 00:00:00 2001 From: Ana Rosa Date: Mon, 21 Apr 2025 21:27:11 +0100 Subject: [PATCH 1/2] Fix alignment in Series subtraction with MultiIndex and NaN (GH#60908) Fixes an issue where subtracting a Series with a MultiIndex containing NaN values from a regular Index Series led to incorrect results. Updated _align_for_op to properly reindex right-hand Series based on the first level of the MultiIndex and handle empty Series cases. Added test_subtraction_nanindex to ensure correct behavior. --- pandas/core/series.py | 50 ++++++++++--------- .../tests/series/test_subtraction_nanindex.py | 38 ++++++++++++++ 2 files changed, 65 insertions(+), 23 deletions(-) create mode 100644 pandas/tests/series/test_subtraction_nanindex.py diff --git a/pandas/core/series.py b/pandas/core/series.py index d6a982c65e9fd..589cf9662d81b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5890,32 +5890,36 @@ def _arith_method(self, other, op): self, other = self._align_for_op(other) return base.IndexOpsMixin._arith_method(self, other, op) - def _align_for_op(self, right, align_asobject: bool = False): - """align lhs and rhs Series""" - # TODO: Different from DataFrame._align_for_op, list, tuple and ndarray - # are not coerced here - # because Series has inconsistencies described in GH#13637 + def _align_for_op(self, right, align_asobject=False, fill_value=np.nan): + """Align lhs and rhs Series for arithmetic operations""" + left = self - if isinstance(right, Series): - # avoid repeated alignment - if not left.index.equals(right.index): - if align_asobject: - if left.dtype not in (object, np.bool_) or right.dtype not in ( - object, - np.bool_, - ): - pass - # GH#52538 no longer cast in these cases - else: - # to keep original value's dtype for bool ops - left = left.astype(object) - right = right.astype(object) - - left, right = left.align(right) - - return left, right + if not isinstance(right, Series): + return left, right + + if left.index.equals(right.index): + return left, right + if not (hasattr(left.index, "levels") or hasattr(right.index, "levels")): + if align_asobject: + if left.empty or right.empty: + if left.dtype not in (object, np.bool_) or right.dtype not in (object, np.bool_): + return left.iloc[0:0], right.iloc[0:0] + return left.align(right, join='outer', fill_value=fill_value) + + if hasattr(left.index, "levels") and not hasattr(right.index, "levels"): + if left.empty or right.empty: + return left.iloc[0:0], right.iloc[0:0] + else: + first_level = left.index.get_level_values(0) + left = left.astype(object) + right = right.astype(object) + right_aligned = right.reindex(first_level, fill_value=fill_value) + return left, right_aligned + + return left.align(right, join='outer', fill_value=fill_value) + def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: """ Perform generic binary operation with optional fill value. diff --git a/pandas/tests/series/test_subtraction_nanindex.py b/pandas/tests/series/test_subtraction_nanindex.py new file mode 100644 index 0000000000000..a97aec4ede490 --- /dev/null +++ b/pandas/tests/series/test_subtraction_nanindex.py @@ -0,0 +1,38 @@ +import pytest +import pandas as pd +import numpy as np +import pandas.testing as tm + +def test_series_subtraction_with_nan_and_levels(): + ix1 = pd.MultiIndex.from_arrays( + [ + [np.nan, 81, 81, 82, 82], + [np.nan] * 5, + pd.to_datetime([np.nan, '2018-06-01', '2018-07-01', '2018-07-01', '2018-08-01']) + ], + names=['foo', 'bar', 'date'] + ) + + s1 = pd.Series( + [np.nan, 25.058969, 22.519751, 20.847981, 21.625236], + index=ix1 + ) + + ix2 = pd.Index([81, 82, 83, 84, 85, 86, 87], name='foo') + s2 = pd.Series( + [28.2800, 25.2500, 22.2200, 16.7660, 14.0087, 14.9480, 29.2900], + index=ix2 + ) + + expected = pd.Series( + [np.nan, -3.221031, -5.760249, -4.402019, -3.624764], + index=ix1, + dtype='float64' + ) + + result = s1 - s2 + + result = result.astype('float64') + + tm.assert_series_equal(result, expected) + From 84b38aa7afe805dad3525d4257094191121363ed Mon Sep 17 00:00:00 2001 From: Ana Rita Rosa Date: Sun, 11 May 2025 23:43:47 +0100 Subject: [PATCH 2/2] Fix E501: format long lines in series.py and test_subtraction_nanindex.py --- pandas/core/series.py | 11 +++++--- .../tests/series/test_subtraction_nanindex.py | 28 ++++++++----------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 589cf9662d81b..4ef94dec3ec8e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5904,9 +5904,12 @@ def _align_for_op(self, right, align_asobject=False, fill_value=np.nan): if not (hasattr(left.index, "levels") or hasattr(right.index, "levels")): if align_asobject: if left.empty or right.empty: - if left.dtype not in (object, np.bool_) or right.dtype not in (object, np.bool_): + if left.dtype not in (object, np.bool_) or right.dtype not in ( + object, + np.bool_, + ): return left.iloc[0:0], right.iloc[0:0] - return left.align(right, join='outer', fill_value=fill_value) + return left.align(right, join="outer", fill_value=fill_value) if hasattr(left.index, "levels") and not hasattr(right.index, "levels"): if left.empty or right.empty: @@ -5918,8 +5921,8 @@ def _align_for_op(self, right, align_asobject=False, fill_value=np.nan): right_aligned = right.reindex(first_level, fill_value=fill_value) return left, right_aligned - return left.align(right, join='outer', fill_value=fill_value) - + return left.align(right, join="outer", fill_value=fill_value) + def _binop(self, other: Series, func, level=None, fill_value=None) -> Series: """ Perform generic binary operation with optional fill value. diff --git a/pandas/tests/series/test_subtraction_nanindex.py b/pandas/tests/series/test_subtraction_nanindex.py index a97aec4ede490..e7284703eb59f 100644 --- a/pandas/tests/series/test_subtraction_nanindex.py +++ b/pandas/tests/series/test_subtraction_nanindex.py @@ -1,38 +1,34 @@ -import pytest -import pandas as pd import numpy as np + +import pandas as pd import pandas.testing as tm + def test_series_subtraction_with_nan_and_levels(): ix1 = pd.MultiIndex.from_arrays( [ [np.nan, 81, 81, 82, 82], [np.nan] * 5, - pd.to_datetime([np.nan, '2018-06-01', '2018-07-01', '2018-07-01', '2018-08-01']) + pd.to_datetime( + [np.nan, "2018-06-01", "2018-07-01", "2018-07-01", "2018-08-01"] + ), ], - names=['foo', 'bar', 'date'] + names=["foo", "bar", "date"], ) - s1 = pd.Series( - [np.nan, 25.058969, 22.519751, 20.847981, 21.625236], - index=ix1 - ) + s1 = pd.Series([np.nan, 25.058969, 22.519751, 20.847981, 21.625236], index=ix1) - ix2 = pd.Index([81, 82, 83, 84, 85, 86, 87], name='foo') + ix2 = pd.Index([81, 82, 83, 84, 85, 86, 87], name="foo") s2 = pd.Series( - [28.2800, 25.2500, 22.2200, 16.7660, 14.0087, 14.9480, 29.2900], - index=ix2 + [28.2800, 25.2500, 22.2200, 16.7660, 14.0087, 14.9480, 29.2900], index=ix2 ) expected = pd.Series( - [np.nan, -3.221031, -5.760249, -4.402019, -3.624764], - index=ix1, - dtype='float64' + [np.nan, -3.221031, -5.760249, -4.402019, -3.624764], index=ix1, dtype="float64" ) result = s1 - s2 - result = result.astype('float64') + result = result.astype("float64") tm.assert_series_equal(result, expected) -