From e272d5a1e0fff2fd14ba8bc900d7ccade0cc6f10 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 01:02:38 -0500 Subject: [PATCH 1/7] Update algorithms.py --- pandas/core/algorithms.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6c1dfc4c0da72..0c0b93f41c657 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1181,7 +1181,6 @@ def compute(self, method: str) -> Series: arr = arr[::-1] nbase = n - findex = len(self.obj) narr = len(arr) n = min(n, narr) @@ -1194,6 +1193,11 @@ def compute(self, method: str) -> Series: if self.keep != "all": inds = inds[:n] findex = nbase + else: + if len(inds) < nbase and len(nan_index) + len(inds) >= nbase: + findex = len(nan_index) + len(inds) + else: + findex = len(inds) if self.keep == "last": # reverse indices From a6ae35ca1661adf9f3f9592f6babc9ec2bef5950 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 01:15:19 -0500 Subject: [PATCH 2/7] Update test_nlargest.py --- pandas/tests/frame/methods/test_nlargest.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 1b2db80d782ce..a317dae562ae0 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -216,3 +216,24 @@ def test_nlargest_nan(self): result = df.nlargest(5, 0) expected = df.sort_values(0, ascending=False).head(5) tm.assert_frame_equal(result, expected) + + def test_nsmallest_nan_after_n_element(self): + # GH#46589 + df = pd.DataFrame( + { + "a": [1, 2, 3, 4, 5, None, 7], + "b": [7, 6, 5, 4, 3, 2, 1], + "c": [1, 1, 2, 2, 3, 3, 3], + }, + index=range(7), + ) + result = df.nsmallest(5, columns=["a", "b"]) + expected = pd.DataFrame( + { + "a": [1, 2, 3, 4, 5], + "b": [7, 6, 5, 4, 3], + "c": [1, 1, 2, 2, 3], + }, + index=range(5), + ).astype({"a": "float"}) + tm.assert_frame_equal(result, expected) From 7515c5f58397bae609976a8a30cb60321a476f9a Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 01:16:13 -0500 Subject: [PATCH 3/7] Update test_nlargest.py --- pandas/tests/series/methods/test_nlargest.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index ee96ab08ad66c..614cb6b55da83 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -231,3 +231,15 @@ def test_nlargest_nullable(self, any_numeric_ea_dtype): .astype(dtype) ) tm.assert_series_equal(result, expected) + + def test_nsmallest_nan_when_keep_is_all(self): + # GH#46589 + s = Series([1, 2, 3, 3, 3, None]) + result = s.nsmallest(3, keep="all") + expected = Series([1., 2., 3., 3., 3.]) + tm.assert_series_equal(result, expected) + + s = Series([1, 2, None, None, None]) + result = s.nsmallest(3, keep="all") + expected = Series([1, 2, None, None, None]) + tm.assert_series_equal(result, expected) From cda82bcb068a909afd1eeba747ef5f1c64b88157 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 01:23:43 -0500 Subject: [PATCH 4/7] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 73dc832e2007b..52a3b3327f0cd 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -532,6 +532,7 @@ Indexing - Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) - Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) - Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) +- Bug in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Missing From cabe5e0f1a78910fe61c85dc10c02676bf9d4cb5 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Wed, 6 Apr 2022 01:31:30 -0500 Subject: [PATCH 5/7] fix format --- pandas/tests/series/methods/test_nlargest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py index 614cb6b55da83..4f07257038bc9 100644 --- a/pandas/tests/series/methods/test_nlargest.py +++ b/pandas/tests/series/methods/test_nlargest.py @@ -236,7 +236,7 @@ def test_nsmallest_nan_when_keep_is_all(self): # GH#46589 s = Series([1, 2, 3, 3, 3, None]) result = s.nsmallest(3, keep="all") - expected = Series([1., 2., 3., 3., 3.]) + expected = Series([1.0, 2.0, 3.0, 3.0, 3.0]) tm.assert_series_equal(result, expected) s = Series([1, 2, None, None, None]) From f8006204e3146024af4b370f89f03ada5b4112b0 Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Fri, 8 Apr 2022 10:16:21 -0500 Subject: [PATCH 6/7] Update v1.5.0.rst --- doc/source/whatsnew/v1.5.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 52a3b3327f0cd..73dc832e2007b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -532,7 +532,6 @@ Indexing - Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) - Bug in setting large integer values into :class:`Series` with ``float32`` or ``float16`` dtype incorrectly altering these values instead of coercing to ``float64`` dtype (:issue:`45844`) - Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) -- Bug in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - Missing From 0a259df67dfe8ab520eb3a247a2353ec9093e07c Mon Sep 17 00:00:00 2001 From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com> Date: Fri, 8 Apr 2022 10:18:02 -0500 Subject: [PATCH 7/7] Update v1.4.3.rst --- doc/source/whatsnew/v1.4.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst index d53acc698c3bb..386bcdf23841c 100644 --- a/doc/source/whatsnew/v1.4.3.rst +++ b/doc/source/whatsnew/v1.4.3.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`) - .. ---------------------------------------------------------------------------