From 8f9637c49015c74e18826c23a5af21e8ebf7a0bb Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 14 Jun 2019 22:42:42 +0800 Subject: [PATCH 01/11] Add test to ensure that nargsort can handle EA --- pandas/tests/extension/base/methods.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 1852edaa9e748..054a38cd25336 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -44,6 +44,15 @@ def test_argsort_missing(self, data_missing_for_sorting): expected = pd.Series(np.array([1, -1, 0], dtype=np.int64)) self.assert_series_equal(result, expected) + @pytest.mark.parametrize('na_position, expected', [ + ('last', np.array([2, 0, 1])), + ('first', np.array([1, 2, 0])) + ]) + def test_nargsort(self, data_missing_for_sorting, na_position, expected): + from pandas.core.sorting import nargsort + result = nargsort(data_missing_for_sorting, na_position=na_position) + tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize('ascending', [True, False]) def test_sort_values(self, data_for_sorting, ascending): ser = pd.Series(data_for_sorting) From 44601912a8467da0d46e8b9b06504f1b04b577ac Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 14 Jun 2019 22:43:09 +0800 Subject: [PATCH 02/11] Modify nargsort --- pandas/core/sorting.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 21c0c8f747b10..6efc5ca7447f1 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -9,8 +9,10 @@ from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_categorical_dtype, - is_extension_array_dtype, is_list_like) + is_extension_array_dtype, is_list_like, is_sparse) from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.generic import ABCIndexClass + import pandas.core.algorithms as algorithms @@ -239,12 +241,12 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): GH #6399, #5231 """ + mask = isna(items) # specially handle Categorical if is_categorical_dtype(items): if na_position not in {'first', 'last'}: raise ValueError('invalid na_position: {!r}'.format(na_position)) - mask = isna(items) cnt_null = mask.sum() sorted_idx = items.argsort(ascending=ascending, kind=kind) if ascending and na_position == 'last': @@ -255,15 +257,19 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): sorted_idx = np.roll(sorted_idx, cnt_null) return sorted_idx - with warnings.catch_warnings(): - # https://github.com/pandas-dev/pandas/issues/25439 - # can be removed once ExtensionArrays are properly handled by nargsort - warnings.filterwarnings( - "ignore", category=FutureWarning, - message="Converting timezone-aware DatetimeArray to") + if (not isinstance(items, ABCIndexClass) + and is_extension_array_dtype(items)): + + if is_sparse(items): + # The conversion to np.ndarray is the fact that + # SparseArray.isna() is also a SparseArray + mask = np.array(isna(items)) + + items = items._values_for_argsort() + else: items = np.asanyarray(items) + idx = np.arange(len(items)) - mask = isna(items) non_nans = items[~mask] non_nan_idx = idx[~mask] nan_idx = np.nonzero(mask)[0] From 2f37a6343c1b6fe1ec116fd76a58ea5987ef1314 Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 14 Jun 2019 22:55:23 +0800 Subject: [PATCH 03/11] Remove test_nargsort_datetimearray_warning --- pandas/tests/test_sorting.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 75fa37eb9af09..fd7819d15546c 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -181,13 +181,6 @@ def test_nargsort(self): exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - def test_nargsort_datetimearray_warning(self): - # https://github.com/pandas-dev/pandas/issues/25439 - # can be removed once the FutureWarning for np.array(DTA) is removed - data = to_datetime([0, 2, 0, 1]).tz_localize('Europe/Brussels') - with tm.assert_produces_warning(None): - nargsort(data) - class TestMerge: From 707d9e369bb92b80e771bdcc965a48329b478bda Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 14 Jun 2019 23:23:11 +0800 Subject: [PATCH 04/11] Add whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 77b689569d57f..0e621cff74240 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -133,6 +133,7 @@ Other Enhancements - :meth:`DataFrame.describe` now formats integer percentiles without decimal point (:issue:`26660`) - Added support for reading SPSS .sav files using :func:`read_spss` (:issue:`26537`) - Added new option ``plotting.backend`` to be able to select a plotting backend different than the existing ``matplotlib`` one. Use ``pandas.set_option('plotting.backend', '')`` where `` Date: Fri, 14 Jun 2019 23:24:20 +0800 Subject: [PATCH 05/11] Fix lint error --- pandas/core/sorting.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 6efc5ca7447f1..5a78bf65fcc3b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -1,6 +1,4 @@ """ miscellaneous sorting / groupby utilities """ -import warnings - import numpy as np from pandas._libs import algos, hashtable, lib @@ -10,9 +8,8 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_categorical_dtype, is_extension_array_dtype, is_list_like, is_sparse) -from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCIndexClass - +from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms From 42d7092861197201105772b87906e3663a0db9e0 Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 15 Jun 2019 10:08:23 +0800 Subject: [PATCH 06/11] Change after 1st review --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/tests/extension/base/methods.py | 5 +++-- pandas/tests/test_sorting.py | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 0e621cff74240..25150ac644734 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -133,7 +133,7 @@ Other Enhancements - :meth:`DataFrame.describe` now formats integer percentiles without decimal point (:issue:`26660`) - Added support for reading SPSS .sav files using :func:`read_spss` (:issue:`26537`) - Added new option ``plotting.backend`` to be able to select a plotting backend different than the existing ``matplotlib`` one. Use ``pandas.set_option('plotting.backend', '')`` where `` Date: Sat, 15 Jun 2019 10:13:09 +0800 Subject: [PATCH 07/11] Correct comment --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 5a78bf65fcc3b..0050ef0af86b3 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -258,7 +258,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): and is_extension_array_dtype(items)): if is_sparse(items): - # The conversion to np.ndarray is the fact that + # The conversion to np.ndarray is due to the fact that # SparseArray.isna() is also a SparseArray mask = np.array(isna(items)) From 7543a84448798cbbdf3f1df3fd4e0496f3a46135 Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 21 Jun 2019 22:34:33 +0800 Subject: [PATCH 08/11] change after 2nd review --- pandas/core/sorting.py | 7 +------ pandas/tests/extension/base/methods.py | 6 +++--- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 0050ef0af86b3..dc0e337dbf94f 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -238,7 +238,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): GH #6399, #5231 """ - mask = isna(items) + mask = np.asarray(isna(items)) # specially handle Categorical if is_categorical_dtype(items): if na_position not in {'first', 'last'}: @@ -257,11 +257,6 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): if (not isinstance(items, ABCIndexClass) and is_extension_array_dtype(items)): - if is_sparse(items): - # The conversion to np.ndarray is due to the fact that - # SparseArray.isna() is also a SparseArray - mask = np.array(isna(items)) - items = items._values_for_argsort() else: items = np.asanyarray(items) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 4f3b1dc06b215..5ec26d19c7758 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -46,13 +46,13 @@ def test_argsort_missing(self, data_missing_for_sorting): self.assert_series_equal(result, expected) @pytest.mark.parametrize('na_position, expected', [ - ('last', np.array([2, 0, 1])), - ('first', np.array([1, 2, 0])) + ('last', np.array([2, 0, 1], dtype='int64')), + ('first', np.array([1, 2, 0], dtype='int64')) ]) def test_nargsort(self, data_missing_for_sorting, na_position, expected): # GH 25439 result = nargsort(data_missing_for_sorting, na_position=na_position) - tm.assert_numpy_array_equal(result, expected, check_dtype=False) + tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize('ascending', [True, False]) def test_sort_values(self, data_for_sorting, ascending): From a65fea3bf09d657b760c0827a84dbd802e98603d Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 21 Jun 2019 23:39:03 +0800 Subject: [PATCH 09/11] Fix lint --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index dc0e337dbf94f..0f5d6da300f6f 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -7,7 +7,7 @@ from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_categorical_dtype, - is_extension_array_dtype, is_list_like, is_sparse) + is_extension_array_dtype, is_list_like) from pandas.core.dtypes.generic import ABCIndexClass from pandas.core.dtypes.missing import isna From 365afef4ea700cad9f7cff864a8e5d2a0daa9943 Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 22 Jun 2019 22:11:46 +0800 Subject: [PATCH 10/11] Remove whatsnew entry --- doc/source/whatsnew/v0.25.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 25150ac644734..77b689569d57f 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -133,7 +133,6 @@ Other Enhancements - :meth:`DataFrame.describe` now formats integer percentiles without decimal point (:issue:`26660`) - Added support for reading SPSS .sav files using :func:`read_spss` (:issue:`26537`) - Added new option ``plotting.backend`` to be able to select a plotting backend different than the existing ``matplotlib`` one. Use ``pandas.set_option('plotting.backend', '')`` where `` Date: Sun, 23 Jun 2019 11:02:26 +0800 Subject: [PATCH 11/11] Use extract_array ahead --- pandas/core/sorting.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 0f5d6da300f6f..750a4c903176f 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_categorical_dtype, is_extension_array_dtype, is_list_like) -from pandas.core.dtypes.generic import ABCIndexClass from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms @@ -237,7 +236,9 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): handles NaNs. It adds ascending and na_position parameters. GH #6399, #5231 """ + from pandas.core.internals.arrays import extract_array + items = extract_array(items) mask = np.asarray(isna(items)) # specially handle Categorical if is_categorical_dtype(items): @@ -254,9 +255,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): sorted_idx = np.roll(sorted_idx, cnt_null) return sorted_idx - if (not isinstance(items, ABCIndexClass) - and is_extension_array_dtype(items)): - + if is_extension_array_dtype(items): items = items._values_for_argsort() else: items = np.asanyarray(items)