diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index b5d0532c6dfa3..18edc290768c8 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1437,6 +1437,7 @@ Sparse - Bug in ``DataFrame.groupby`` not including ``fill_value`` in the groups for non-NA ``fill_value`` when grouping by a sparse column (:issue:`5078`) - Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`) - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) +- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 0e5a8280cc467..619cd05128ddb 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -784,6 +784,23 @@ def test_fillna_overlap(self): exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64) tm.assert_sp_array_equal(res, exp) + def test_nonzero(self): + # Tests regression #21172. + sa = pd.SparseArray([ + float('nan'), + float('nan'), + 1, 0, 0, + 2, 0, 0, 0, + 3, 0, 0 + ]) + expected = np.array([2, 5, 9], dtype=np.int32) + result, = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + + sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + result, = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + class TestSparseArrayAnalytics(object): diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index dd73ec69c3b9a..f802598542cb9 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -1360,3 +1360,16 @@ def test_assign_with_sparse_frame(self): for column in res.columns: assert type(res[column]) is SparseSeries + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("how", ["all", "any"]) + def test_dropna(self, inplace, how): + # Tests regression #21172. + expected = pd.SparseDataFrame({"F2": [0, 1]}) + input_df = pd.SparseDataFrame( + {"F1": [float('nan'), float('nan')], "F2": [0, 1]} + ) + result_df = input_df.dropna(axis=1, inplace=inplace, how=how) + if inplace: + result_df = input_df + tm.assert_sp_frame_equal(expected, result_df)