From a07d5fc6d813d2ce7e00c039667328014e4bc7a0 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 6 Jun 2018 17:21:29 +0100 Subject: [PATCH] Add __contains__ to CategoricalIndex --- asv_bench/benchmarks/categoricals.py | 13 +++++++++++++ doc/source/whatsnew/v0.23.1.txt | 2 ++ pandas/core/indexes/category.py | 16 +++++++++------- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 5464e7cba22c3..48f42621d183d 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -193,3 +193,16 @@ def time_categorical_series_is_monotonic_increasing(self): def time_categorical_series_is_monotonic_decreasing(self): self.s.is_monotonic_decreasing + + +class Contains(object): + + goal_time = 0.2 + + def setup(self): + N = 10**5 + self.ci = tm.makeCategoricalIndex(N) + self.cat = self.ci.categories[0] + + def time_contains(self): + self.cat in self.ci diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 9c29c34adb7dd..e23db591181fa 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -33,6 +33,8 @@ Performance Improvements - Improved performance of :meth:`CategoricalIndex.is_monotonic_increasing`, :meth:`CategoricalIndex.is_monotonic_decreasing` and :meth:`CategoricalIndex.is_monotonic` (:issue:`21025`) - Improved performance of :meth:`CategoricalIndex.is_unique` (:issue:`21107`) +- Improved performance of membership checks in :class:`CategoricalIndex` + (i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains` is likewise much faster (:issue:`21107`) - - diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 150eca32e229d..9643d8c3ddc26 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -328,16 +328,18 @@ def __contains__(self, key): if self.categories._defer_to_indexing: return key in self.categories - return key in self.values + try: + code_value = self.categories.get_loc(key) + except KeyError: + if isna(key): + code_value = -1 + else: + return False + return code_value in self._engine @Appender(_index_shared_docs['contains'] % _index_doc_kwargs) def contains(self, key): - hash(key) - - if self.categories._defer_to_indexing: - return self.categories.contains(key) - - return key in self.values + return key in self def __array__(self, dtype=None): """ the array interface, return my values """