From 665362278afbe48e5b5086b87d5a291c2c5a4356 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 6 Dec 2021 12:34:28 -0800 Subject: [PATCH 1/2] ENH: support np.add.reduce, np.multiply.reduce --- pandas/conftest.py | 12 ++ pandas/core/arraylike.py | 2 + pandas/core/arrays/masked.py | 8 +- pandas/tests/arithmetic/conftest.py | 15 -- pandas/tests/arrays/boolean/test_function.py | 12 +- pandas/tests/arrays/floating/test_function.py | 9 +- pandas/tests/arrays/integer/test_function.py | 9 +- pandas/tests/series/test_ufunc.py | 143 ++++++++++++++---- 8 files changed, 148 insertions(+), 62 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index eb9a952250f36..f40af9feb3833 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -387,6 +387,18 @@ def index_or_series_or_array(request): return request.param +@pytest.fixture(params=[Index, Series, DataFrame, pd.array], ids=lambda x: x.__name__) +def box_with_array(request): + """ + Fixture to test behavior for Index, Series, DataFrame, and pandas Array + classes + """ + return request.param + + +box_with_array2 = box_with_array + + @pytest.fixture def dict_subclass(): """ diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index c496099e3a8d2..96ec853552646 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -25,6 +25,8 @@ REDUCTION_ALIASES = { "maximum": "max", "minimum": "min", + "add": "sum", + "multiply": "prod", } diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index ef0b407eebd9e..1818eeec751e8 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -419,9 +419,6 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): # For MaskedArray inputs, we apply the ufunc to ._data # and mask the result. - if method == "reduce" and ufunc not in [np.maximum, np.minimum]: - # Not clear how to handle missing values in reductions. Raise. - raise NotImplementedError("The 'reduce' method is not supported.") out = kwargs.get("out", ()) @@ -481,6 +478,11 @@ def reconstruct(x): result = getattr(ufunc, method)(*inputs2, **kwargs) if isinstance(result, tuple): return tuple(reconstruct(x) for x in result) + elif method == "reduce": + # e.g. np.add.reduce; test_ufunc_reduce_raises + if self._mask.any(): + return self._na_value + return result else: return reconstruct(result) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index 55cbfaf76d5a7..c163f3de86c12 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -216,17 +216,6 @@ def mismatched_freq(request): # ------------------------------------------------------------------ -@pytest.fixture( - params=[pd.Index, pd.Series, pd.DataFrame, pd.array], ids=lambda x: x.__name__ -) -def box_with_array(request): - """ - Fixture to test behavior for Index, Series, DataFrame, and pandas Array - classes - """ - return request.param - - @pytest.fixture( params=[pd.Index, pd.Series, tm.to_array, np.array, list], ids=lambda x: x.__name__ ) @@ -236,7 +225,3 @@ def box_1d_array(request): classes """ return request.param - - -# alias so we can use the same fixture for multiple parameters in a test -box_with_array2 = box_with_array diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index db362afc80087..78992f3124779 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -79,10 +79,14 @@ def test_ufunc_numeric(): @pytest.mark.parametrize("values", [[True, False], [True, None]]) def test_ufunc_reduce_raises(values): - a = pd.array(values, dtype="boolean") - msg = "The 'reduce' method is not supported" - with pytest.raises(NotImplementedError, match=msg): - np.add.reduce(a) + arr = pd.array(values, dtype="boolean") + + res = np.add.reduce(arr) + if arr[-1] is pd.NA: + expected = pd.NA + else: + expected = arr._data.sum() + tm.assert_almost_equal(res, expected) def test_value_counts_na(): diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index ff84116fa1b18..3fe869280dc2c 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -67,10 +67,11 @@ def test_ufuncs_binary_float(ufunc): @pytest.mark.parametrize("values", [[0, 1], [0, None]]) def test_ufunc_reduce_raises(values): - a = pd.array(values, dtype="Float64") - msg = r"The 'reduce' method is not supported." - with pytest.raises(NotImplementedError, match=msg): - np.add.reduce(a) + arr = pd.array(values, dtype="Float64") + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) @pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system") diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py index 596b78f8bbe77..96fe1e77f6bc5 100644 --- a/pandas/tests/arrays/integer/test_function.py +++ b/pandas/tests/arrays/integer/test_function.py @@ -79,10 +79,11 @@ def test_ufunc_binary_output(): @pytest.mark.parametrize("values", [[0, 1], [0, None]]) def test_ufunc_reduce_raises(values): - a = pd.array(values) - msg = r"The 'reduce' method is not supported." - with pytest.raises(NotImplementedError, match=msg): - np.add.reduce(a) + arr = pd.array(values) + + res = np.add.reduce(arr) + expected = arr.sum(skipna=False) + tm.assert_almost_equal(res, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index f4d7d41dbee04..e5a45fb813824 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -252,9 +252,8 @@ def __add__(self, other): tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1)))) -@pytest.mark.parametrize( - "values", - [ +@pytest.fixture( + params=[ pd.array([1, 3, 2], dtype=np.int64), pd.array([1, 3, 2], dtype="Int64"), pd.array([1, 3, 2], dtype="Float32"), @@ -267,41 +266,121 @@ def __add__(self, other): ], ids=lambda x: str(x.dtype), ) -@pytest.mark.parametrize("box", [pd.array, pd.Index, pd.Series, pd.DataFrame]) -def test_reduce(values, box, request): - # TODO: cases with NAs +def values_for_np_reduce(request): + # min/max tests assume that these are monotonic increasing + return request.param - same_type = True - if box is pd.Index: - if values.dtype.kind in ["i", "f"]: +class TestNumpyReductions: + # TODO: cases with NAs, axis kwarg for DataFrame + + def test_multiply(self, values_for_np_reduce, box_with_array, request): + box = box_with_array + values = values_for_np_reduce + + obj = box(values) + + if isinstance(values, pd.core.arrays.SparseArray) and box is not pd.Index: + mark = pytest.mark.xfail(reason="SparseArray has no 'mul'") + request.node.add_marker(mark) + + if values.dtype.kind in "iuf": + result = np.multiply.reduce(obj) + if box is pd.DataFrame: + expected = obj.prod(numeric_only=False) + tm.assert_series_equal(result, expected) + elif box is pd.Index: + # Int64Index, Index has no 'prod' + expected = obj._values.prod() + assert result == expected + else: + + expected = obj.prod() + assert result == expected + else: + msg = "|".join( + [ + "does not support reduction", + "unsupported operand type", + "ufunc 'multiply' cannot use operands", + ] + ) + with pytest.raises(TypeError, match=msg): + np.multiply.reduce(obj) + + def test_add(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + obj = box(values) + + if values.dtype.kind in "miuf": + result = np.add.reduce(obj) + if box is pd.DataFrame: + expected = obj.sum(numeric_only=False) + tm.assert_series_equal(result, expected) + elif box is pd.Index: + # Int64Index, Index has no 'sum' + expected = obj._values.sum() + assert result == expected + else: + expected = obj.sum() + assert result == expected + else: + msg = "|".join( + [ + "does not support reduction", + "unsupported operand type", + "ufunc 'add' cannot use operands", + ] + ) + with pytest.raises(TypeError, match=msg): + np.add.reduce(obj) + + def test_max(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + same_type = True + if box is pd.Index and values.dtype.kind in ["i", "f"]: # ATM Index casts to object, so we get python ints/floats same_type = False - obj = box(values) + obj = box(values) - result = np.maximum.reduce(obj) - expected = values[1] - if box is pd.DataFrame: - # TODO: cases with axis kwarg - expected = obj.max(numeric_only=False) - tm.assert_series_equal(result, expected) - else: - assert result == expected - if same_type: - # check we have e.g. Timestamp instead of dt64 - assert type(result) == type(expected) - - result = np.minimum.reduce(obj) - expected = values[0] - if box is pd.DataFrame: - expected = obj.min(numeric_only=False) - tm.assert_series_equal(result, expected) - else: - assert result == expected - if same_type: - # check we have e.g. Timestamp instead of dt64 - assert type(result) == type(expected) + result = np.maximum.reduce(obj) + if box is pd.DataFrame: + # TODO: cases with axis kwarg + expected = obj.max(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + expected = values[1] + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) + + def test_min(self, values_for_np_reduce, box_with_array): + box = box_with_array + values = values_for_np_reduce + + same_type = True + if box is pd.Index and values.dtype.kind in ["i", "f"]: + # ATM Index casts to object, so we get python ints/floats + same_type = False + + obj = box(values) + + result = np.minimum.reduce(obj) + if box is pd.DataFrame: + expected = obj.min(numeric_only=False) + tm.assert_series_equal(result, expected) + else: + expected = values[0] + assert result == expected + if same_type: + # check we have e.g. Timestamp instead of dt64 + assert type(result) == type(expected) @pytest.mark.parametrize("type_", [list, deque, tuple]) From 106812f1bb3c599006009235cc77808d58320b94 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 28 Dec 2021 15:11:17 -0800 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1c1415255bf89..b230c009472d7 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -928,7 +928,7 @@ ExtensionArray ^^^^^^^^^^^^^^ - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`) - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`, :issue:`23316`) -- NumPy ufuncs ``np.minimum.reduce`` and ``np.maximum.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`) +- NumPy ufuncs ``np.minimum.reduce`` ``np.maximum.reduce``, ``np.add.reduce``, and ``np.prod.reduce`` now work correctly instead of raising ``NotImplementedError`` on :class:`Series` with ``IntegerDtype`` or ``FloatDtype`` (:issue:`43923`, :issue:`44793`) - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) - Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`) - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)