From 6430336066da84bcfde32f59e7ffdd78fe28529b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Nov 2020 08:01:45 -0800 Subject: [PATCH 1/3] BUG: CategoricalIndex.equals casting incorrectly --- pandas/core/indexes/category.py | 16 ++++++++++------ .../tests/indexes/categorical/test_category.py | 8 ++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 525c41bae8b51..f8747bc63601d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -20,7 +20,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna from pandas.core import accessor from pandas.core.arrays.categorical import Categorical, contains @@ -263,6 +263,7 @@ def _is_dtype_compat(self, other) -> Categorical: values = other if not is_list_like(values): values = [values] + cat = Categorical(other, dtype=self.dtype) other = CategoricalIndex(cat) if not other.isin(values).all(): @@ -271,6 +272,12 @@ def _is_dtype_compat(self, other) -> Categorical: ) other = other._values + if not ((other == values) | (isna(other) & isna(values))).all(): + # GH#???? see test_equals_non_category + raise TypeError( + "categories must match existing categories when appending" + ) + return other def equals(self, other: object) -> bool: @@ -291,13 +298,10 @@ def equals(self, other: object) -> bool: try: other = self._is_dtype_compat(other) - if isinstance(other, type(self)): - other = other._data - return self._data.equals(other) except (TypeError, ValueError): - pass + return False - return False + return self._data.equals(other) # -------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index cf2430d041d88..9685aef0f957d 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -444,6 +444,14 @@ def test_equals_categorical_unordered(self): assert not a.equals(c) assert not b.equals(c) + def test_equals_non_category(self): + # Case where other contains a value not among ci's categories ("D") and + # also contains np.nan + ci = CategoricalIndex(["A", "B", np.nan, np.nan]) + other = Index(["A", "B", "D", np.nan]) + + assert not ci.equals(other) + def test_frame_repr(self): df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"])) result = repr(df) From c407131486b782bfe03069a610fea50593c4f47e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Nov 2020 08:02:44 -0800 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 690e6b8f725ad..e5dc7a9c7267c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -380,7 +380,7 @@ Categorical ^^^^^^^^^^^ - :meth:`Categorical.fillna` will always return a copy, will validate a passed fill value regardless of whether there are any NAs to fill, and will disallow a ``NaT`` as a fill value for numeric categories (:issue:`36530`) - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) -- +- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`???`) Datetimelike ^^^^^^^^^^^^ From 11a492ad2fa6e13e35638a669cfc8a2019ed9367 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 6 Nov 2020 08:05:05 -0800 Subject: [PATCH 3/3] GH ref --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/indexes/category.py | 2 +- pandas/tests/indexes/categorical/test_category.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e5dc7a9c7267c..73493bbeb0eac 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -380,7 +380,7 @@ Categorical ^^^^^^^^^^^ - :meth:`Categorical.fillna` will always return a copy, will validate a passed fill value regardless of whether there are any NAs to fill, and will disallow a ``NaT`` as a fill value for numeric categories (:issue:`36530`) - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) -- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`???`) +- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index f8747bc63601d..085d2f5e832a2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -273,7 +273,7 @@ def _is_dtype_compat(self, other) -> Categorical: other = other._values if not ((other == values) | (isna(other) & isna(values))).all(): - # GH#???? see test_equals_non_category + # GH#37667 see test_equals_non_category raise TypeError( "categories must match existing categories when appending" ) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 9685aef0f957d..324a2535bc465 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -445,8 +445,8 @@ def test_equals_categorical_unordered(self): assert not b.equals(c) def test_equals_non_category(self): - # Case where other contains a value not among ci's categories ("D") and - # also contains np.nan + # GH#37667 Case where other contains a value not among ci's + # categories ("D") and also contains np.nan ci = CategoricalIndex(["A", "B", np.nan, np.nan]) other = Index(["A", "B", "D", np.nan])