From 9cca3c5851d44cbaa28eb731ea81fe5d982ccbaa Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 23 Jan 2016 21:38:47 -0800 Subject: [PATCH 1/8] Split pandas.core.index into a pandas/indexes subpackage --- pandas/core/index.py | 7084 +----------------------------------- pandas/indexes/__init__.py | 0 pandas/indexes/api.py | 115 + pandas/indexes/base.py | 3309 +++++++++++++++++ pandas/indexes/category.py | 598 +++ pandas/indexes/float.py | 0 pandas/indexes/multi.py | 2166 +++++++++++ pandas/indexes/numeric.py | 369 ++ pandas/indexes/range.py | 623 ++++ 9 files changed, 7183 insertions(+), 7081 deletions(-) create mode 100644 pandas/indexes/__init__.py create mode 100644 pandas/indexes/api.py create mode 100644 pandas/indexes/base.py create mode 100644 pandas/indexes/category.py create mode 100644 pandas/indexes/float.py create mode 100644 pandas/indexes/multi.py create mode 100644 pandas/indexes/numeric.py create mode 100644 pandas/indexes/range.py diff --git a/pandas/core/index.py b/pandas/core/index.py index ad5ed86236e50..05f98d59a1f56 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7081 +1,3 @@ -# pylint: disable=E1101,E1103,W0232 -import datetime -import warnings -import operator -from functools import partial -from sys import getsizeof - -import numpy as np -import pandas.tslib as tslib -import pandas.lib as lib -import pandas.algos as _algos -import pandas.index as _index -from pandas.lib import Timestamp, Timedelta, is_datetime_array - -from pandas.compat import range, zip, lrange, lzip, u, map -from pandas import compat -from pandas.core import algorithms -from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, - IndexOpsMixin, PandasDelegate) -import pandas.core.base as base -from pandas.util.decorators import (Appender, Substitution, cache_readonly, - deprecate, deprecate_kwarg) -import pandas.core.common as com -from pandas.core.missing import _clean_reindex_fill_method -from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, - is_object_dtype, is_datetimetz, ABCSeries, - ABCCategorical, ABCPeriodIndex, - _values_from_object, is_float, is_integer, - is_iterator, is_categorical_dtype, - _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, is_null_slice, - is_integer_dtype, is_int64_dtype) -from pandas.core.strings import StringAccessorMixin - -from pandas.core.config import get_option -from pandas.io.common import PerformanceWarning - -# simplify -default_pprint = lambda x, max_seq_items=None: \ - com.pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, - max_seq_items=max_seq_items) - -__all__ = ['Index'] - -_unsortable_types = frozenset(('mixed', 'mixed-integer')) - -_index_doc_kwargs = dict(klass='Index', inplace='', duplicated='np.array') -_index_shared_docs = dict() - - -def _try_get_item(x): - try: - return x.item() - except AttributeError: - return x - - -class InvalidIndexError(Exception): - pass - - -_o_dtype = np.dtype(object) -_Identity = object - - -def _new_Index(cls, d): - """ This is called upon unpickling, rather than the default which doesn't - have arguments and breaks __new__ - """ - return cls.__new__(cls, **d) - - -class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): - """ - Immutable ndarray implementing an ordered, sliceable set. The basic object - storing axis labels for all pandas objects - - Parameters - ---------- - data : array-like (1-dimensional) - dtype : NumPy dtype (default: object) - copy : bool - Make a copy of input ndarray - name : object - Name to be stored in the index - tupleize_cols : bool (default: True) - When True, attempt to create a MultiIndex if possible - - Notes - ----- - An Index instance can **only** contain hashable objects - """ - # To hand over control to subclasses - _join_precedence = 1 - - # Cython methods - _groupby = _algos.groupby_object - _arrmap = _algos.arrmap_object - _left_indexer_unique = _algos.left_join_indexer_unique_object - _left_indexer = _algos.left_join_indexer_object - _inner_indexer = _algos.inner_join_indexer_object - _outer_indexer = _algos.outer_join_indexer_object - _box_scalars = False - - _typ = 'index' - _data = None - _id = None - name = None - asi8 = None - _comparables = ['name'] - _attributes = ['name'] - _allow_index_ops = True - _allow_datetime_index_ops = False - _allow_period_index_ops = False - _is_numeric_dtype = False - _can_hold_na = True - - # prioritize current class for _shallow_copy_with_infer, - # used to infer integers as datetime-likes - _infer_as_myclass = False - - _engine_type = _index.ObjectEngine - - def __new__(cls, data=None, dtype=None, copy=False, name=None, - fastpath=False, tupleize_cols=True, **kwargs): - - if name is None and hasattr(data, 'name'): - name = data.name - - if fastpath: - return cls._simple_new(data, name) - - # range - if isinstance(data, RangeIndex): - return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) - elif isinstance(data, range): - return RangeIndex.from_range(data, copy=copy, dtype=dtype, - name=name) - - # categorical - if is_categorical_dtype(data) or is_categorical_dtype(dtype): - return CategoricalIndex(data, copy=copy, name=name, **kwargs) - - # index-like - elif isinstance(data, (np.ndarray, Index, ABCSeries)): - - if (issubclass(data.dtype.type, np.datetime64) or - is_datetimetz(data)): - from pandas.tseries.index import DatetimeIndex - result = DatetimeIndex(data, copy=copy, name=name, **kwargs) - if dtype is not None and _o_dtype == dtype: - return Index(result.to_pydatetime(), dtype=_o_dtype) - else: - return result - - elif issubclass(data.dtype.type, np.timedelta64): - from pandas.tseries.tdi import TimedeltaIndex - result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) - if dtype is not None and _o_dtype == dtype: - return Index(result.to_pytimedelta(), dtype=_o_dtype) - else: - return result - - if dtype is not None: - try: - data = np.array(data, dtype=dtype, copy=copy) - except (TypeError, ValueError): - pass - - # maybe coerce to a sub-class - from pandas.tseries.period import PeriodIndex - if isinstance(data, PeriodIndex): - return PeriodIndex(data, copy=copy, name=name, **kwargs) - if issubclass(data.dtype.type, np.integer): - return Int64Index(data, copy=copy, dtype=dtype, name=name) - elif issubclass(data.dtype.type, np.floating): - return Float64Index(data, copy=copy, dtype=dtype, name=name) - elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): - subarr = data.astype('object') - else: - subarr = com._asarray_tuplesafe(data, dtype=object) - - # _asarray_tuplesafe does not always copy underlying data, - # so need to make sure that this happens - if copy: - subarr = subarr.copy() - - if dtype is None: - inferred = lib.infer_dtype(subarr) - if inferred == 'integer': - return Int64Index(subarr.astype('i8'), copy=copy, - name=name) - elif inferred in ['floating', 'mixed-integer-float']: - return Float64Index(subarr, copy=copy, name=name) - elif inferred == 'boolean': - # don't support boolean explicity ATM - pass - elif inferred != 'string': - if (inferred.startswith('datetime') or - tslib.is_timestamp_array(subarr)): - - if (lib.is_datetime_with_singletz_array(subarr) or - 'tz' in kwargs): - # only when subarr has the same tz - from pandas.tseries.index import DatetimeIndex - return DatetimeIndex(subarr, copy=copy, name=name, - **kwargs) - - elif (inferred.startswith('timedelta') or - lib.is_timedelta_array(subarr)): - from pandas.tseries.tdi import TimedeltaIndex - return TimedeltaIndex(subarr, copy=copy, name=name, - **kwargs) - elif inferred == 'period': - return PeriodIndex(subarr, name=name, **kwargs) - return cls._simple_new(subarr, name) - - elif hasattr(data, '__array__'): - return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, - **kwargs) - elif data is None or np.isscalar(data): - cls._scalar_data_error(data) - else: - if (tupleize_cols and isinstance(data, list) and data and - isinstance(data[0], tuple)): - - # we must be all tuples, otherwise don't construct - # 10697 - if all(isinstance(e, tuple) for e in data): - try: - # must be orderable in py3 - if compat.PY3: - sorted(data) - return MultiIndex.from_tuples(data, names=name or - kwargs.get('names')) - except (TypeError, KeyError): - # python2 - MultiIndex fails on mixed types - pass - # other iterable of some kind - subarr = com._asarray_tuplesafe(data, dtype=object) - return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) - - """ - NOTE for new Index creation: - - - _simple_new: It returns new Index with the same type as the caller. - All metadata (such as name) must be provided by caller's responsibility. - Using _shallow_copy is recommended because it fills these metadata - otherwise specified. - - - _shallow_copy: It returns new Index with the same type (using - _simple_new), but fills caller's metadata otherwise specified. Passed - kwargs will overwrite corresponding metadata. - - - _shallow_copy_with_infer: It returns new Index inferring its type - from passed values. It fills caller's metadata otherwise specified as the - same as _shallow_copy. - - See each method's docstring. - """ - - @classmethod - def _simple_new(cls, values, name=None, dtype=None, **kwargs): - """ - we require the we have a dtype compat for the values - if we are passed a non-dtype compat, then coerce using the constructor - - Must be careful not to recurse. - """ - if not hasattr(values, 'dtype'): - if values is None and dtype is not None: - values = np.empty(0, dtype=dtype) - else: - values = np.array(values, copy=False) - if is_object_dtype(values): - values = cls(values, name=name, dtype=dtype, - **kwargs)._values - - result = object.__new__(cls) - result._data = values - result.name = name - for k, v in compat.iteritems(kwargs): - setattr(result, k, v) - result._reset_identity() - return result - - def _shallow_copy(self, values=None, **kwargs): - """ - create a new Index with the same class as the caller, don't copy the - data, use the same object attributes with passed in attributes taking - precedence - - *this is an internal non-public method* - - Parameters - ---------- - values : the values to create the new Index, optional - kwargs : updates the default attributes for this Index - """ - if values is None: - values = self.values - attributes = self._get_attributes_dict() - attributes.update(kwargs) - return self._simple_new(values, **attributes) - - def _shallow_copy_with_infer(self, values=None, **kwargs): - """ - create a new Index inferring the class with passed value, don't copy - the data, use the same object attributes with passed in attributes - taking precedence - - *this is an internal non-public method* - - Parameters - ---------- - values : the values to create the new Index, optional - kwargs : updates the default attributes for this Index - """ - if values is None: - values = self.values - attributes = self._get_attributes_dict() - attributes.update(kwargs) - attributes['copy'] = False - if self._infer_as_myclass: - try: - return self._constructor(values, **attributes) - except (TypeError, ValueError): - pass - return Index(values, **attributes) - - def _update_inplace(self, result, **kwargs): - # guard when called from IndexOpsMixin - raise TypeError("Index can't be updated inplace") - - def is_(self, other): - """ - More flexible, faster check like ``is`` but that works through views - - Note: this is *not* the same as ``Index.identical()``, which checks - that metadata is also the same. - - Parameters - ---------- - other : object - other object to compare against. - - Returns - ------- - True if both have same underlying data, False otherwise : bool - """ - # use something other than None to be clearer - return self._id is getattr( - other, '_id', Ellipsis) and self._id is not None - - def _reset_identity(self): - """Initializes or resets ``_id`` attribute with new object""" - self._id = _Identity() - - # ndarray compat - def __len__(self): - """ - return the length of the Index - """ - return len(self._data) - - def __array__(self, dtype=None): - """ the array interface, return my values """ - return self._data.view(np.ndarray) - - def __array_wrap__(self, result, context=None): - """ - Gets called after a ufunc - """ - if is_bool_dtype(result): - return result - - attrs = self._get_attributes_dict() - attrs = self._maybe_update_attributes(attrs) - return Index(result, **attrs) - - @cache_readonly - def dtype(self): - """ return the dtype object of the underlying data """ - return self._data.dtype - - @cache_readonly - def dtype_str(self): - """ return the dtype str of the underlying data """ - return str(self.dtype) - - @property - def values(self): - """ return the underlying data as an ndarray """ - return self._data.view(np.ndarray) - - def get_values(self): - """ return the underlying data as an ndarray """ - return self.values - - # ops compat - def tolist(self): - """ - return a list of the Index values - """ - return list(self.values) - - def repeat(self, n): - """ - return a new Index of the values repeated n times - - See also - -------- - numpy.ndarray.repeat - """ - return self._shallow_copy(self._values.repeat(n)) - - def ravel(self, order='C'): - """ - return an ndarray of the flattened values of the underlying data - - See also - -------- - numpy.ndarray.ravel - """ - return self._values.ravel(order=order) - - # construction helpers - @classmethod - def _scalar_data_error(cls, data): - raise TypeError('{0}(...) must be called with a collection of some ' - 'kind, {1} was passed'.format(cls.__name__, - repr(data))) - - @classmethod - def _string_data_error(cls, data): - raise TypeError('String dtype not supported, you may need ' - 'to explicitly cast to a numeric type') - - @classmethod - def _coerce_to_ndarray(cls, data): - """coerces data to ndarray, raises on scalar data. Converts other - iterables to list first and then to array. Does not touch ndarrays. - """ - - if not isinstance(data, (np.ndarray, Index)): - if data is None or np.isscalar(data): - cls._scalar_data_error(data) - - # other iterable of some kind - if not isinstance(data, (ABCSeries, list, tuple)): - data = list(data) - data = np.asarray(data) - return data - - def _get_attributes_dict(self): - """ return an attributes dict for my class """ - return dict([(k, getattr(self, k, None)) for k in self._attributes]) - - def view(self, cls=None): - - # we need to see if we are subclassing an - # index type here - if cls is not None and not hasattr(cls, '_typ'): - result = self._data.view(cls) - else: - result = self._shallow_copy() - if isinstance(result, Index): - result._id = self._id - return result - - def _coerce_scalar_to_index(self, item): - """ - we need to coerce a scalar to a compat for our index type - - Parameters - ---------- - item : scalar item to coerce - """ - return Index([item], dtype=self.dtype, **self._get_attributes_dict()) - - _index_shared_docs['copy'] = """ - Make a copy of this object. Name and dtype sets those attributes on - the new object. - - Parameters - ---------- - name : string, optional - deep : boolean, default False - dtype : numpy dtype or pandas type - - Returns - ------- - copy : Index - - Notes - ----- - In most cases, there should be no functional difference from using - ``deep``, but if ``deep`` is passed it will attempt to deepcopy. - """ - - @Appender(_index_shared_docs['copy']) - def copy(self, name=None, deep=False, dtype=None, **kwargs): - names = kwargs.get('names') - if names is not None and name is not None: - raise TypeError("Can only provide one of `names` and `name`") - if deep: - from copy import deepcopy - new_index = self._shallow_copy(self._data.copy()) - name = name or deepcopy(self.name) - else: - new_index = self._shallow_copy() - name = self.name - if name is not None: - names = [name] - if names: - new_index = new_index.set_names(names) - if dtype: - new_index = new_index.astype(dtype) - return new_index - - __copy__ = copy - - def __unicode__(self): - """ - Return a string representation for this object. - - Invoked by unicode(df) in py2 only. Yields a Unicode String in both - py2/py3. - """ - klass = self.__class__.__name__ - data = self._format_data() - attrs = self._format_attrs() - space = self._format_space() - - prepr = (u(",%s") % - space).join([u("%s=%s") % (k, v) for k, v in attrs]) - - # no data provided, just attributes - if data is None: - data = '' - - res = u("%s(%s%s)") % (klass, data, prepr) - - return res - - def _format_space(self): - - # using space here controls if the attributes - # are line separated or not (the default) - - # max_seq_items = get_option('display.max_seq_items') - # if len(self) > max_seq_items: - # space = "\n%s" % (' ' * (len(klass) + 1)) - return " " - - @property - def _formatter_func(self): - """ - Return the formatted data as a unicode string - """ - return default_pprint - - def _format_data(self): - """ - Return the formatted data as a unicode string - """ - from pandas.core.format import get_console_size, _get_adjustment - display_width, _ = get_console_size() - if display_width is None: - display_width = get_option('display.width') or 80 - - space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) - space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) - - n = len(self) - sep = ',' - max_seq_items = get_option('display.max_seq_items') or n - formatter = self._formatter_func - - # do we want to justify (only do so for non-objects) - is_justify = not (self.inferred_type in ('string', 'unicode') or - (self.inferred_type == 'categorical' and - is_object_dtype(self.categories))) - - # are we a truncated display - is_truncated = n > max_seq_items - - # adj can optionaly handle unicode eastern asian width - adj = _get_adjustment() - - def _extend_line(s, line, value, display_width, next_line_prefix): - - if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= - display_width): - s += line.rstrip() - line = next_line_prefix - line += value - return s, line - - def best_len(values): - if values: - return max([adj.len(x) for x in values]) - else: - return 0 - - if n == 0: - summary = '[], ' - elif n == 1: - first = formatter(self[0]) - summary = '[%s], ' % first - elif n == 2: - first = formatter(self[0]) - last = formatter(self[-1]) - summary = '[%s, %s], ' % (first, last) - else: - - if n > max_seq_items: - n = min(max_seq_items // 2, 10) - head = [formatter(x) for x in self[:n]] - tail = [formatter(x) for x in self[-n:]] - else: - head = [] - tail = [formatter(x) for x in self] - - # adjust all values to max length if needed - if is_justify: - - # however, if we are not truncated and we are only a single - # line, then don't justify - if (is_truncated or - not (len(', '.join(head)) < display_width and - len(', '.join(tail)) < display_width)): - max_len = max(best_len(head), best_len(tail)) - head = [x.rjust(max_len) for x in head] - tail = [x.rjust(max_len) for x in tail] - - summary = "" - line = space2 - - for i in range(len(head)): - word = head[i] + sep + ' ' - summary, line = _extend_line(summary, line, word, - display_width, space2) - - if is_truncated: - # remove trailing space of last line - summary += line.rstrip() + space2 + '...' - line = space2 - - for i in range(len(tail) - 1): - word = tail[i] + sep + ' ' - summary, line = _extend_line(summary, line, word, - display_width, space2) - - # last value: no sep added + 1 space of width used for trailing ',' - summary, line = _extend_line(summary, line, tail[-1], - display_width - 2, space2) - summary += line - summary += '],' - - if len(summary) > (display_width): - summary += space1 - else: # one row - summary += ' ' - - # remove initial space - summary = '[' + summary[len(space2):] - - return summary - - def _format_attrs(self): - """ - Return a list of tuples of the (attr,formatted_value) - """ - attrs = [] - attrs.append(('dtype', "'%s'" % self.dtype)) - if self.name is not None: - attrs.append(('name', default_pprint(self.name))) - max_seq_items = get_option('display.max_seq_items') or len(self) - if len(self) > max_seq_items: - attrs.append(('length', len(self))) - return attrs - - def to_series(self, **kwargs): - """ - Create a Series with both index and values equal to the index keys - useful with map for returning an indexer based on an index - - Returns - ------- - Series : dtype will be based on the type of the Index values. - """ - - from pandas import Series - return Series(self._to_embed(), index=self, name=self.name) - - def _to_embed(self, keep_tz=False): - """ - *this is an internal non-public method* - - return an array repr of this object, potentially casting to object - - """ - return self.values.copy() - - def astype(self, dtype): - return Index(self.values.astype(dtype), name=self.name, dtype=dtype) - - def _to_safe_for_reshape(self): - """ convert to object if we are a categorical """ - return self - - def to_datetime(self, dayfirst=False): - """ - For an Index containing strings or datetime.datetime objects, attempt - conversion to DatetimeIndex - """ - from pandas.tseries.index import DatetimeIndex - if self.inferred_type == 'string': - from dateutil.parser import parse - parser = lambda x: parse(x, dayfirst=dayfirst) - parsed = lib.try_parse_dates(self.values, parser=parser) - return DatetimeIndex(parsed) - else: - return DatetimeIndex(self.values) - - def _assert_can_do_setop(self, other): - if not com.is_list_like(other): - raise TypeError('Input must be Index or array-like') - return True - - def _convert_can_do_setop(self, other): - if not isinstance(other, Index): - other = Index(other, name=self.name) - result_name = self.name - else: - result_name = self.name if self.name == other.name else None - return other, result_name - - @property - def nlevels(self): - return 1 - - def _get_names(self): - return FrozenList((self.name, )) - - def _set_names(self, values, level=None): - if len(values) != 1: - raise ValueError('Length of new names must be 1, got %d' % - len(values)) - self.name = values[0] - - names = property(fset=_set_names, fget=_get_names) - - def set_names(self, names, level=None, inplace=False): - """ - Set new names on index. Defaults to returning new index. - - Parameters - ---------- - names : str or sequence - name(s) to set - level : int, level name, or sequence of int/level names (default None) - If the index is a MultiIndex (hierarchical), level(s) to set (None - for all levels). Otherwise level must be None - inplace : bool - if True, mutates in place - - Returns - ------- - new index (of same type and class...etc) [if inplace, returns None] - - Examples - -------- - >>> Index([1, 2, 3, 4]).set_names('foo') - Int64Index([1, 2, 3, 4], dtype='int64') - >>> Index([1, 2, 3, 4]).set_names(['foo']) - Int64Index([1, 2, 3, 4], dtype='int64') - >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), - (2, u'one'), (2, u'two')], - names=['foo', 'bar']) - >>> idx.set_names(['baz', 'quz']) - MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'baz', u'quz']) - >>> idx.set_names('baz', level=0) - MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'baz', u'bar']) - """ - if level is not None and self.nlevels == 1: - raise ValueError('Level must be None for non-MultiIndex') - - if level is not None and not is_list_like(level) and is_list_like( - names): - raise TypeError("Names must be a string") - - if not is_list_like(names) and level is None and self.nlevels > 1: - raise TypeError("Must pass list-like as `names`.") - - if not is_list_like(names): - names = [names] - if level is not None and not is_list_like(level): - level = [level] - - if inplace: - idx = self - else: - idx = self._shallow_copy() - idx._set_names(names, level=level) - if not inplace: - return idx - - def rename(self, name, inplace=False): - """ - Set new names on index. Defaults to returning new index. - - Parameters - ---------- - name : str or list - name to set - inplace : bool - if True, mutates in place - - Returns - ------- - new index (of same type and class...etc) [if inplace, returns None] - """ - return self.set_names([name], inplace=inplace) - - @property - def _has_complex_internals(self): - # to disable groupby tricks in MultiIndex - return False - - def summary(self, name=None): - if len(self) > 0: - head = self[0] - if (hasattr(head, 'format') and - not isinstance(head, compat.string_types)): - head = head.format() - tail = self[-1] - if (hasattr(tail, 'format') and - not isinstance(tail, compat.string_types)): - tail = tail.format() - index_summary = ', %s to %s' % (com.pprint_thing(head), - com.pprint_thing(tail)) - else: - index_summary = '' - - if name is None: - name = type(self).__name__ - return '%s: %s entries%s' % (name, len(self), index_summary) - - def _mpl_repr(self): - # how to represent ourselves to matplotlib - return self.values - - _na_value = np.nan - """The expected NA value to use with this index.""" - - @property - def is_monotonic(self): - """ alias for is_monotonic_increasing (deprecated) """ - return self._engine.is_monotonic_increasing - - @property - def is_monotonic_increasing(self): - """ - return if the index is monotonic increasing (only equal or - increasing) values. - """ - return self._engine.is_monotonic_increasing - - @property - def is_monotonic_decreasing(self): - """ - return if the index is monotonic decreasing (only equal or - decreasing) values. - """ - return self._engine.is_monotonic_decreasing - - def is_lexsorted_for_tuple(self, tup): - return True - - @cache_readonly(allow_setting=True) - def is_unique(self): - """ return if the index has unique values """ - return self._engine.is_unique - - @property - def has_duplicates(self): - return not self.is_unique - - def is_boolean(self): - return self.inferred_type in ['boolean'] - - def is_integer(self): - return self.inferred_type in ['integer'] - - def is_floating(self): - return self.inferred_type in ['floating', 'mixed-integer-float'] - - def is_numeric(self): - return self.inferred_type in ['integer', 'floating'] - - def is_object(self): - return is_object_dtype(self.dtype) - - def is_categorical(self): - return self.inferred_type in ['categorical'] - - def is_mixed(self): - return 'mixed' in self.inferred_type - - def holds_integer(self): - return self.inferred_type in ['integer', 'mixed-integer'] - - def _convert_scalar_indexer(self, key, kind=None): - """ - convert a scalar indexer - - Parameters - ---------- - key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) - - right now we are converting - floats -> ints if the index supports it - """ - - def to_int(): - ikey = int(key) - if ikey != key: - return self._invalid_indexer('label', key) - return ikey - - if kind == 'iloc': - if is_integer(key): - return key - elif is_float(key): - key = to_int() - warnings.warn("scalar indexers for index type {0} should be " - "integers and not floating point".format( - type(self).__name__), - FutureWarning, stacklevel=5) - return key - return self._invalid_indexer('label', key) - - if is_float(key): - if isnull(key): - return self._invalid_indexer('label', key) - warnings.warn("scalar indexers for index type {0} should be " - "integers and not floating point".format( - type(self).__name__), - FutureWarning, stacklevel=3) - return to_int() - - return key - - def _convert_slice_indexer_getitem(self, key, is_index_slice=False): - """ called from the getitem slicers, determine how to treat the key - whether positional or not """ - if self.is_integer() or is_index_slice: - return key - return self._convert_slice_indexer(key) - - def _convert_slice_indexer(self, key, kind=None): - """ - convert a slice indexer. disallow floats in the start/stop/step - - Parameters - ---------- - key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) - """ - - # if we are not a slice, then we are done - if not isinstance(key, slice): - return key - - # validate iloc - if kind == 'iloc': - - # need to coerce to_int if needed - def f(c): - v = getattr(key, c) - if v is None or is_integer(v): - return v - - # warn if it's a convertible float - if v == int(v): - warnings.warn("slice indexers when using iloc should be " - "integers and not floating point", - FutureWarning, stacklevel=7) - return int(v) - - self._invalid_indexer('slice {0} value'.format(c), v) - - return slice(*[f(c) for c in ['start', 'stop', 'step']]) - - # validate slicers - def validate(v): - if v is None or is_integer(v): - return True - - # dissallow floats (except for .ix) - elif is_float(v): - if kind == 'ix': - return True - - return False - - return True - - for c in ['start', 'stop', 'step']: - v = getattr(key, c) - if not validate(v): - self._invalid_indexer('slice {0} value'.format(c), v) - - # figure out if this is a positional indexer - start, stop, step = key.start, key.stop, key.step - - def is_int(v): - return v is None or is_integer(v) - - is_null_slicer = start is None and stop is None - is_index_slice = is_int(start) and is_int(stop) - is_positional = is_index_slice and not self.is_integer() - - if kind == 'getitem': - return self._convert_slice_indexer_getitem( - key, is_index_slice=is_index_slice) - - # convert the slice to an indexer here - - # if we are mixed and have integers - try: - if is_positional and self.is_mixed(): - # TODO: i, j are not used anywhere - if start is not None: - i = self.get_loc(start) # noqa - if stop is not None: - j = self.get_loc(stop) # noqa - is_positional = False - except KeyError: - if self.inferred_type == 'mixed-integer-float': - raise - - if is_null_slicer: - indexer = key - elif is_positional: - indexer = key - else: - try: - indexer = self.slice_indexer(start, stop, step) - except Exception: - if is_index_slice: - if self.is_integer(): - raise - else: - indexer = key - else: - raise - - return indexer - - def _convert_list_indexer(self, keyarr, kind=None): - """ - passed a key that is tuplesafe that is integer based - and we have a mixed index (e.g. number/labels). figure out - the indexer. return None if we can't help - """ - if (kind in [None, 'iloc', 'ix'] and - is_integer_dtype(keyarr) and not self.is_floating() and - not isinstance(keyarr, ABCPeriodIndex)): - - if self.inferred_type == 'mixed-integer': - indexer = self.get_indexer(keyarr) - if (indexer >= 0).all(): - return indexer - # missing values are flagged as -1 by get_indexer and negative - # indices are already converted to positive indices in the - # above if-statement, so the negative flags are changed to - # values outside the range of indices so as to trigger an - # IndexError in maybe_convert_indices - indexer[indexer < 0] = len(self) - from pandas.core.indexing import maybe_convert_indices - return maybe_convert_indices(indexer, len(self)) - - elif not self.inferred_type == 'integer': - keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) - return keyarr - - return None - - def _invalid_indexer(self, form, key): - """ consistent invalid indexer message """ - raise TypeError("cannot do {form} indexing on {klass} with these " - "indexers [{key}] of {kind}".format( - form=form, klass=type(self), key=key, - kind=type(key))) - - def get_duplicates(self): - from collections import defaultdict - counter = defaultdict(lambda: 0) - for k in self.values: - counter[k] += 1 - return sorted(k for k, v in compat.iteritems(counter) if v > 1) - - _get_duplicates = get_duplicates - - def _cleanup(self): - self._engine.clear_mapping() - - @cache_readonly - def _constructor(self): - return type(self) - - @cache_readonly - def _engine(self): - # property, for now, slow to look up - return self._engine_type(lambda: self.values, len(self)) - - def _validate_index_level(self, level): - """ - Validate index level. - - For single-level Index getting level number is a no-op, but some - verification must be done like in MultiIndex. - - """ - if isinstance(level, int): - if level < 0 and level != -1: - raise IndexError("Too many levels: Index has only 1 level," - " %d is not a valid level number" % (level, )) - elif level > 0: - raise IndexError("Too many levels:" - " Index has only 1 level, not %d" % - (level + 1)) - elif level != self.name: - raise KeyError('Level %s must be same as name (%s)' % - (level, self.name)) - - def _get_level_number(self, level): - self._validate_index_level(level) - return 0 - - @cache_readonly - def inferred_type(self): - """ return a string of the type inferred from the values """ - return lib.infer_dtype(self) - - def is_type_compatible(self, kind): - return kind == self.inferred_type - - @cache_readonly - def is_all_dates(self): - if self._data is None: - return False - return is_datetime_array(_ensure_object(self.values)) - - def __iter__(self): - return iter(self.values) - - def __reduce__(self): - d = dict(data=self._data) - d.update(self._get_attributes_dict()) - return _new_Index, (self.__class__, d), None - - def __setstate__(self, state): - """Necessary for making this object picklable""" - - if isinstance(state, dict): - self._data = state.pop('data') - for k, v in compat.iteritems(state): - setattr(self, k, v) - - elif isinstance(state, tuple): - - if len(state) == 2: - nd_state, own_state = state - data = np.empty(nd_state[1], dtype=nd_state[2]) - np.ndarray.__setstate__(data, nd_state) - self.name = own_state[0] - - else: # pragma: no cover - data = np.empty(state) - np.ndarray.__setstate__(data, state) - - self._data = data - self._reset_identity() - else: - raise Exception("invalid pickle state") - - _unpickle_compat = __setstate__ - - def __deepcopy__(self, memo=None): - if memo is None: - memo = {} - return self.copy(deep=True) - - def __nonzero__(self): - raise ValueError("The truth value of a {0} is ambiguous. " - "Use a.empty, a.bool(), a.item(), a.any() or a.all()." - .format(self.__class__.__name__)) - - __bool__ = __nonzero__ - - def __contains__(self, key): - hash(key) - # work around some kind of odd cython bug - try: - return key in self._engine - except TypeError: - return False - - def __hash__(self): - raise TypeError("unhashable type: %r" % type(self).__name__) - - def __setitem__(self, key, value): - raise TypeError("Index does not support mutable operations") - - def __getitem__(self, key): - """ - Override numpy.ndarray's __getitem__ method to work as desired. - - This function adds lists and Series as valid boolean indexers - (ndarrays only supports ndarray with dtype=bool). - - If resulting ndim != 1, plain ndarray is returned instead of - corresponding `Index` subclass. - - """ - # There's no custom logic to be implemented in __getslice__, so it's - # not overloaded intentionally. - getitem = self._data.__getitem__ - promote = self._shallow_copy - - if np.isscalar(key): - return getitem(key) - - if isinstance(key, slice): - # This case is separated from the conditional above to avoid - # pessimization of basic indexing. - return promote(getitem(key)) - - if is_bool_indexer(key): - key = np.asarray(key) - - key = _values_from_object(key) - result = getitem(key) - if not np.isscalar(result): - return promote(result) - else: - return result - - def _ensure_compat_append(self, other): - """ - prepare the append - - Returns - ------- - list of to_concat, name of result Index - """ - name = self.name - to_concat = [self] - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - for obj in to_concat: - if (isinstance(obj, Index) and obj.name != name and - obj.name is not None): - name = None - break - - to_concat = self._ensure_compat_concat(to_concat) - to_concat = [x._values if isinstance(x, Index) else x - for x in to_concat] - return to_concat, name - - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : Index - """ - to_concat, name = self._ensure_compat_append(other) - attribs = self._get_attributes_dict() - attribs['name'] = name - return self._shallow_copy_with_infer( - np.concatenate(to_concat), **attribs) - - @staticmethod - def _ensure_compat_concat(indexes): - from pandas.tseries.api import (DatetimeIndex, PeriodIndex, - TimedeltaIndex) - klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex - - is_ts = [isinstance(idx, klasses) for idx in indexes] - - if any(is_ts) and not all(is_ts): - return [_maybe_box(idx) for idx in indexes] - - return indexes - - def take(self, indices, axis=0, allow_fill=True, fill_value=None): - """ - return a new Index of the values selected by the indexer - - For internal compatibility with numpy arrays. - - # filling must always be None/nan here - # but is passed thru internally - - See also - -------- - numpy.ndarray.take - """ - - indices = com._ensure_platform_int(indices) - taken = self.values.take(indices) - return self._shallow_copy(taken) - - @cache_readonly - def _isnan(self): - """ return if each value is nan""" - if self._can_hold_na: - return isnull(self) - else: - # shouldn't reach to this condition by checking hasnans beforehand - values = np.empty(len(self), dtype=np.bool_) - values.fill(False) - return values - - @cache_readonly - def _nan_idxs(self): - if self._can_hold_na: - w, = self._isnan.nonzero() - return w - else: - return np.array([], dtype=np.int64) - - @cache_readonly - def hasnans(self): - """ return if I have any nans; enables various perf speedups """ - if self._can_hold_na: - return self._isnan.any() - else: - return False - - def _convert_for_op(self, value): - """ Convert value to be insertable to ndarray """ - return value - - def _assert_can_do_op(self, value): - """ Check value is valid for scalar op """ - if not lib.isscalar(value): - msg = "'value' must be a scalar, passed: {0}" - raise TypeError(msg.format(type(value).__name__)) - - def putmask(self, mask, value): - """ - return a new Index of the values set with the mask - - See also - -------- - numpy.ndarray.putmask - """ - values = self.values.copy() - try: - np.putmask(values, mask, self._convert_for_op(value)) - return self._shallow_copy(values) - except (ValueError, TypeError): - # coerces to object - return self.astype(object).putmask(mask, value) - - def format(self, name=False, formatter=None, **kwargs): - """ - Render a string representation of the Index - """ - header = [] - if name: - header.append(com.pprint_thing(self.name, - escape_chars=('\t', '\r', '\n')) if - self.name is not None else '') - - if formatter is not None: - return header + list(self.map(formatter)) - - return self._format_with_header(header, **kwargs) - - def _format_with_header(self, header, na_rep='NaN', **kwargs): - values = self.values - - from pandas.core.format import format_array - - if is_categorical_dtype(values.dtype): - values = np.array(values) - elif is_object_dtype(values.dtype): - values = lib.maybe_convert_objects(values, safe=1) - - if is_object_dtype(values.dtype): - result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) - for x in values] - - # could have nans - mask = isnull(values) - if mask.any(): - result = np.array(result) - result[mask] = na_rep - result = result.tolist() - - else: - result = _trim_front(format_array(values, None, justify='left')) - return header + result - - def to_native_types(self, slicer=None, **kwargs): - """ slice and dice then format """ - values = self - if slicer is not None: - values = values[slicer] - return values._format_native_types(**kwargs) - - def _format_native_types(self, na_rep='', quoting=None, **kwargs): - """ actually format my specific types """ - mask = isnull(self) - if not self.is_object() and not quoting: - values = np.asarray(self).astype(str) - else: - values = np.array(self, dtype=object, copy=True) - - values[mask] = na_rep - return values - - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if self.is_(other): - return True - - if not isinstance(other, Index): - return False - - return array_equivalent(_values_from_object(self), - _values_from_object(other)) - - def identical(self, other): - """Similar to equals, but check that other comparable attributes are - also equal - """ - return (self.equals(other) and - all((getattr(self, c, None) == getattr(other, c, None) - for c in self._comparables)) and - type(self) == type(other)) - - def asof(self, label): - """ - For a sorted index, return the most recent label up to and including - the passed label. Return NaN if not found. - - See also - -------- - get_loc : asof is a thin wrapper around get_loc with method='pad' - """ - try: - loc = self.get_loc(label, method='pad') - except KeyError: - return _get_na_value(self.dtype) - else: - if isinstance(loc, slice): - loc = loc.indices(len(self))[-1] - return self[loc] - - def asof_locs(self, where, mask): - """ - where : array of timestamps - mask : array of booleans where data is not NA - - """ - locs = self.values[mask].searchsorted(where.values, side='right') - - locs = np.where(locs > 0, locs - 1, 0) - result = np.arange(len(self))[mask].take(locs) - - first = mask.argmax() - result[(locs == 0) & (where < self.values[first])] = -1 - - return result - - def sort_values(self, return_indexer=False, ascending=True): - """ - Return sorted copy of Index - """ - _as = self.argsort() - if not ascending: - _as = _as[::-1] - - sorted_index = self.take(_as) - - if return_indexer: - return sorted_index, _as - else: - return sorted_index - - def order(self, return_indexer=False, ascending=True): - """ - Return sorted copy of Index - - DEPRECATED: use :meth:`Index.sort_values` - """ - warnings.warn("order is deprecated, use sort_values(...)", - FutureWarning, stacklevel=2) - return self.sort_values(return_indexer=return_indexer, - ascending=ascending) - - def sort(self, *args, **kwargs): - raise TypeError("cannot sort an Index object in-place, use " - "sort_values instead") - - def sortlevel(self, level=None, ascending=True, sort_remaining=None): - """ - - For internal compatibility with with the Index API - - Sort the Index. This is for compat with MultiIndex - - Parameters - ---------- - ascending : boolean, default True - False to sort in descending order - - level, sort_remaining are compat parameters - - Returns - ------- - sorted_index : Index - """ - return self.sort_values(return_indexer=True, ascending=ascending) - - def shift(self, periods=1, freq=None): - """ - Shift Index containing datetime objects by input number of periods and - DateOffset - - Returns - ------- - shifted : Index - """ - raise NotImplementedError("Not supported for type %s" % - type(self).__name__) - - def argsort(self, *args, **kwargs): - """ - return an ndarray indexer of the underlying data - - See also - -------- - numpy.ndarray.argsort - """ - result = self.asi8 - if result is None: - result = np.array(self) - return result.argsort(*args, **kwargs) - - def __add__(self, other): - if com.is_list_like(other): - warnings.warn("using '+' to provide set union with Indexes is " - "deprecated, use '|' or .union()", FutureWarning, - stacklevel=2) - if isinstance(other, Index): - return self.union(other) - return Index(np.array(self) + other) - - def __radd__(self, other): - if is_list_like(other): - warnings.warn("using '+' to provide set union with Indexes is " - "deprecated, use '|' or .union()", FutureWarning, - stacklevel=2) - return Index(other + np.array(self)) - - __iadd__ = __add__ - - def __sub__(self, other): - warnings.warn("using '-' to provide set differences with Indexes is " - "deprecated, use .difference()", FutureWarning, - stacklevel=2) - return self.difference(other) - - def __and__(self, other): - return self.intersection(other) - - def __or__(self, other): - return self.union(other) - - def __xor__(self, other): - return self.sym_diff(other) - - def union(self, other): - """ - Form the union of two Index objects and sorts if possible. - - Parameters - ---------- - other : Index or array-like - - Returns - ------- - union : Index - - Examples - -------- - - >>> idx1 = pd.Index([1, 2, 3, 4]) - >>> idx2 = pd.Index([3, 4, 5, 6]) - >>> idx1.union(idx2) - Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') - - """ - self._assert_can_do_setop(other) - other = _ensure_index(other) - - if len(other) == 0 or self.equals(other): - return self - - if len(self) == 0: - return other - - if not is_dtype_equal(self.dtype, other.dtype): - this = self.astype('O') - other = other.astype('O') - return this.union(other) - - if self.is_monotonic and other.is_monotonic: - try: - result = self._outer_indexer(self.values, other._values)[0] - except TypeError: - # incomparable objects - result = list(self.values) - - # worth making this faster? a very unusual case - value_set = set(self.values) - result.extend([x for x in other._values if x not in value_set]) - else: - indexer = self.get_indexer(other) - indexer, = (indexer == -1).nonzero() - - if len(indexer) > 0: - other_diff = com.take_nd(other._values, indexer, - allow_fill=False) - result = com._concat_compat((self.values, other_diff)) - - try: - self.values[0] < other_diff[0] - except TypeError as e: - warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, RuntimeWarning, - stacklevel=3) - else: - types = frozenset((self.inferred_type, - other.inferred_type)) - if not types & _unsortable_types: - result.sort() - - else: - result = self.values - - try: - result = np.sort(result) - except TypeError as e: - warnings.warn("%s, sort order is undefined for " - "incomparable objects" % e, RuntimeWarning, - stacklevel=3) - - # for subclasses - return self._wrap_union_result(other, result) - - def _wrap_union_result(self, other, result): - name = self.name if self.name == other.name else None - return self.__class__(result, name=name) - - def intersection(self, other): - """ - Form the intersection of two Index objects. - - This returns a new Index with elements common to the index and `other`. - Sortedness of the result is not guaranteed. - - Parameters - ---------- - other : Index or array-like - - Returns - ------- - intersection : Index - - Examples - -------- - - >>> idx1 = pd.Index([1, 2, 3, 4]) - >>> idx2 = pd.Index([3, 4, 5, 6]) - >>> idx1.intersection(idx2) - Int64Index([3, 4], dtype='int64') - - """ - self._assert_can_do_setop(other) - other = _ensure_index(other) - - if self.equals(other): - return self - - if not is_dtype_equal(self.dtype, other.dtype): - this = self.astype('O') - other = other.astype('O') - return this.intersection(other) - - if self.is_monotonic and other.is_monotonic: - try: - result = self._inner_indexer(self.values, other._values)[0] - return self._wrap_union_result(other, result) - except TypeError: - pass - - try: - indexer = Index(self.values).get_indexer(other._values) - indexer = indexer.take((indexer != -1).nonzero()[0]) - except: - # duplicates - indexer = Index(self.values).get_indexer_non_unique( - other._values)[0].unique() - indexer = indexer[indexer != -1] - - taken = self.take(indexer) - if self.name != other.name: - taken.name = None - return taken - - def difference(self, other): - """ - Return a new Index with elements from the index that are not in - `other`. - - This is the sorted set difference of two Index objects. - - Parameters - ---------- - other : Index or array-like - - Returns - ------- - difference : Index - - Examples - -------- - - >>> idx1 = pd.Index([1, 2, 3, 4]) - >>> idx2 = pd.Index([3, 4, 5, 6]) - >>> idx1.difference(idx2) - Int64Index([1, 2], dtype='int64') - - """ - self._assert_can_do_setop(other) - - if self.equals(other): - return Index([], name=self.name) - - other, result_name = self._convert_can_do_setop(other) - - theDiff = sorted(set(self) - set(other)) - return Index(theDiff, name=result_name) - - diff = deprecate('diff', difference) - - def sym_diff(self, other, result_name=None): - """ - Compute the sorted symmetric difference of two Index objects. - - Parameters - ---------- - other : Index or array-like - result_name : str - - Returns - ------- - sym_diff : Index - - Notes - ----- - ``sym_diff`` contains elements that appear in either ``idx1`` or - ``idx2`` but not both. Equivalent to the Index created by - ``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped. - - The sorting of a result containing ``NaN`` values is not guaranteed - across Python versions. See GitHub issue #6444. - - Examples - -------- - >>> idx1 = Index([1, 2, 3, 4]) - >>> idx2 = Index([2, 3, 4, 5]) - >>> idx1.sym_diff(idx2) - Int64Index([1, 5], dtype='int64') - - You can also use the ``^`` operator: - - >>> idx1 ^ idx2 - Int64Index([1, 5], dtype='int64') - """ - self._assert_can_do_setop(other) - other, result_name_update = self._convert_can_do_setop(other) - if result_name is None: - result_name = result_name_update - - the_diff = sorted(set((self.difference(other)). - union(other.difference(self)))) - attribs = self._get_attributes_dict() - attribs['name'] = result_name - if 'freq' in attribs: - attribs['freq'] = None - return self._shallow_copy_with_infer(the_diff, **attribs) - - def get_loc(self, key, method=None, tolerance=None): - """ - Get integer location for requested label - - Parameters - ---------- - key : label - method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional - * default: exact matches only. - * pad / ffill: find the PREVIOUS index value if no exact match. - * backfill / bfill: use NEXT index value if no exact match - * nearest: use the NEAREST index value if no exact match. Tied - distances are broken by preferring the larger index value. - tolerance : optional - Maximum distance from index value for inexact matches. The value of - the index at the matching location most satisfy the equation - ``abs(index[loc] - key) <= tolerance``. - - .. versionadded:: 0.17.0 - - Returns - ------- - loc : int if unique index, possibly slice or mask if not - """ - if method is None: - if tolerance is not None: - raise ValueError('tolerance argument only valid if using pad, ' - 'backfill or nearest lookups') - key = _values_from_object(key) - return self._engine.get_loc(key) - - indexer = self.get_indexer([key], method=method, tolerance=tolerance) - if indexer.ndim > 1 or indexer.size > 1: - raise TypeError('get_loc requires scalar valued input') - loc = indexer.item() - if loc == -1: - raise KeyError(key) - return loc - - def get_value(self, series, key): - """ - Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing - """ - - # if we have something that is Index-like, then - # use this, e.g. DatetimeIndex - s = getattr(series, '_values', None) - if isinstance(s, Index) and lib.isscalar(key): - return s[key] - - s = _values_from_object(series) - k = _values_from_object(key) - - # prevent integer truncation bug in indexing - if is_float(k) and not self.is_floating(): - raise KeyError - - try: - return self._engine.get_value(s, k) - except KeyError as e1: - if len(self) > 0 and self.inferred_type in ['integer', 'boolean']: - raise - - try: - return tslib.get_value_box(s, key) - except IndexError: - raise - except TypeError: - # generator/iterator-like - if is_iterator(key): - raise InvalidIndexError(key) - else: - raise e1 - except Exception: # pragma: no cover - raise e1 - except TypeError: - # python 3 - if np.isscalar(key): # pragma: no cover - raise IndexError(key) - raise InvalidIndexError(key) - - def set_value(self, arr, key, value): - """ - Fast lookup of value from 1-dimensional ndarray. Only use this if you - know what you're doing - """ - self._engine.set_value(_values_from_object(arr), - _values_from_object(key), value) - - def get_level_values(self, level): - """ - Return vector of label values for requested level, equal to the length - of the index - - Parameters - ---------- - level : int - - Returns - ------- - values : ndarray - """ - # checks that level number is actually just 1 - self._validate_index_level(level) - return self - - def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ - Compute indexer and mask for new index given the current index. The - indexer should be then used as an input to ndarray.take to align the - current data to the new index. - - Parameters - ---------- - target : Index - method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional - * default: exact matches only. - * pad / ffill: find the PREVIOUS index value if no exact match. - * backfill / bfill: use NEXT index value if no exact match - * nearest: use the NEAREST index value if no exact match. Tied - distances are broken by preferring the larger index value. - limit : int, optional - Maximum number of consecutive labels in ``target`` to match for - inexact matches. - tolerance : optional - Maximum distance between original and new labels for inexact - matches. The values of the index at the matching locations most - satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - - .. versionadded:: 0.17.0 - - Examples - -------- - >>> indexer = index.get_indexer(new_index) - >>> new_values = cur_values.take(indexer) - - Returns - ------- - indexer : ndarray of int - Integers from 0 to n - 1 indicating that the index at these - positions matches the corresponding target values. Missing values - in the target are marked by -1. - """ - method = _clean_reindex_fill_method(method) - target = _ensure_index(target) - if tolerance is not None: - tolerance = self._convert_tolerance(tolerance) - - pself, ptarget = self._possibly_promote(target) - if pself is not self or ptarget is not target: - return pself.get_indexer(ptarget, method=method, limit=limit, - tolerance=tolerance) - - if not is_dtype_equal(self.dtype, target.dtype): - this = self.astype(object) - target = target.astype(object) - return this.get_indexer(target, method=method, limit=limit, - tolerance=tolerance) - - if not self.is_unique: - raise InvalidIndexError('Reindexing only valid with uniquely' - ' valued Index objects') - - if method == 'pad' or method == 'backfill': - indexer = self._get_fill_indexer(target, method, limit, tolerance) - elif method == 'nearest': - indexer = self._get_nearest_indexer(target, limit, tolerance) - else: - if tolerance is not None: - raise ValueError('tolerance argument only valid if doing pad, ' - 'backfill or nearest reindexing') - if limit is not None: - raise ValueError('limit argument only valid if doing pad, ' - 'backfill or nearest reindexing') - - indexer = self._engine.get_indexer(target._values) - - return com._ensure_platform_int(indexer) - - def _convert_tolerance(self, tolerance): - # override this method on subclasses - return tolerance - - def _get_fill_indexer(self, target, method, limit=None, tolerance=None): - if self.is_monotonic_increasing and target.is_monotonic_increasing: - method = (self._engine.get_pad_indexer if method == 'pad' else - self._engine.get_backfill_indexer) - indexer = method(target._values, limit) - else: - indexer = self._get_fill_indexer_searchsorted(target, method, - limit) - if tolerance is not None: - indexer = self._filter_indexer_tolerance(target._values, indexer, - tolerance) - return indexer - - def _get_fill_indexer_searchsorted(self, target, method, limit=None): - """ - Fallback pad/backfill get_indexer that works for monotonic decreasing - indexes and non-monotonic targets - """ - if limit is not None: - raise ValueError('limit argument for %r method only well-defined ' - 'if index and target are monotonic' % method) - - side = 'left' if method == 'pad' else 'right' - target = np.asarray(target) - - # find exact matches first (this simplifies the algorithm) - indexer = self.get_indexer(target) - nonexact = (indexer == -1) - indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], - side) - if side == 'left': - # searchsorted returns "indices into a sorted array such that, - # if the corresponding elements in v were inserted before the - # indices, the order of a would be preserved". - # Thus, we need to subtract 1 to find values to the left. - indexer[nonexact] -= 1 - # This also mapped not found values (values of 0 from - # np.searchsorted) to -1, which conveniently is also our - # sentinel for missing values - else: - # Mark indices to the right of the largest value as not found - indexer[indexer == len(self)] = -1 - return indexer - - def _get_nearest_indexer(self, target, limit, tolerance): - """ - Get the indexer for the nearest index labels; requires an index with - values that can be subtracted from each other (e.g., not strings or - tuples). - """ - left_indexer = self.get_indexer(target, 'pad', limit=limit) - right_indexer = self.get_indexer(target, 'backfill', limit=limit) - - target = np.asarray(target) - left_distances = abs(self.values[left_indexer] - target) - right_distances = abs(self.values[right_indexer] - target) - - op = operator.lt if self.is_monotonic_increasing else operator.le - indexer = np.where(op(left_distances, right_distances) | - (right_indexer == -1), left_indexer, right_indexer) - if tolerance is not None: - indexer = self._filter_indexer_tolerance(target, indexer, - tolerance) - return indexer - - def _filter_indexer_tolerance(self, target, indexer, tolerance): - distance = abs(self.values[indexer] - target) - indexer = np.where(distance <= tolerance, indexer, -1) - return indexer - - def get_indexer_non_unique(self, target): - """ return an indexer suitable for taking from a non unique index - return the labels in the same order as the target, and - return a missing indexer into the target (missing are marked as -1 - in the indexer); target must be an iterable """ - target = _ensure_index(target) - pself, ptarget = self._possibly_promote(target) - if pself is not self or ptarget is not target: - return pself.get_indexer_non_unique(ptarget) - - if self.is_all_dates: - self = Index(self.asi8) - tgt_values = target.asi8 - else: - tgt_values = target._values - - indexer, missing = self._engine.get_indexer_non_unique(tgt_values) - return Index(indexer), missing - - def get_indexer_for(self, target, **kwargs): - """ guaranteed return of an indexer even when non-unique """ - if self.is_unique: - return self.get_indexer(target, **kwargs) - indexer, _ = self.get_indexer_non_unique(target, **kwargs) - return indexer - - def _possibly_promote(self, other): - # A hack, but it works - from pandas.tseries.index import DatetimeIndex - if self.inferred_type == 'date' and isinstance(other, DatetimeIndex): - return DatetimeIndex(self), other - elif self.inferred_type == 'boolean': - if not is_object_dtype(self.dtype): - return self.astype('object'), other.astype('object') - return self, other - - def groupby(self, to_groupby): - """ - Group the index labels by a given array of values. - - Parameters - ---------- - to_groupby : array - Values used to determine the groups. - - Returns - ------- - groups : dict - {group name -> group labels} - - """ - return self._groupby(self.values, _values_from_object(to_groupby)) - - def map(self, mapper): - return self._arrmap(self.values, mapper) - - def isin(self, values, level=None): - """ - Compute boolean array of whether each index value is found in the - passed set of values. - - Parameters - ---------- - values : set or sequence of values - Sought values. - level : str or int, optional - Name or position of the index level to use (if the index is a - MultiIndex). - - Notes - ----- - If `level` is specified: - - - if it is the name of one *and only one* index level, use that level; - - otherwise it should be a number indicating level position. - - Returns - ------- - is_contained : ndarray (boolean dtype) - - """ - if level is not None: - self._validate_index_level(level) - return algorithms.isin(np.array(self), values) - - def _can_reindex(self, indexer): - """ - *this is an internal non-public method* - - Check if we are allowing reindexing with this particular indexer - - Parameters - ---------- - indexer : an integer indexer - - Raises - ------ - ValueError if its a duplicate axis - """ - - # trying to reindex on an axis with duplicates - if not self.is_unique and len(indexer): - raise ValueError("cannot reindex from a duplicate axis") - - def reindex(self, target, method=None, level=None, limit=None, - tolerance=None): - """ - Create index with target's values (move/add/delete values as necessary) - - Parameters - ---------- - target : an iterable - - Returns - ------- - new_index : pd.Index - Resulting index - indexer : np.ndarray or None - Indices of output values in original index - - """ - # GH6552: preserve names when reindexing to non-named target - # (i.e. neither Index nor Series). - preserve_names = not hasattr(target, 'name') - - # GH7774: preserve dtype/tz if target is empty and not an Index. - target = _ensure_has_len(target) # target may be an iterator - - if not isinstance(target, Index) and len(target) == 0: - attrs = self._get_attributes_dict() - attrs.pop('freq', None) # don't preserve freq - target = self._simple_new(None, dtype=self.dtype, **attrs) - else: - target = _ensure_index(target) - - if level is not None: - if method is not None: - raise TypeError('Fill method not supported if level passed') - _, indexer, _ = self._join_level(target, level, how='right', - return_indexers=True) - else: - if self.equals(target): - indexer = None - else: - if self.is_unique: - indexer = self.get_indexer(target, method=method, - limit=limit, - tolerance=tolerance) - else: - if method is not None or limit is not None: - raise ValueError("cannot reindex a non-unique index " - "with a method or limit") - indexer, missing = self.get_indexer_non_unique(target) - - if preserve_names and target.nlevels == 1 and target.name != self.name: - target = target.copy() - target.name = self.name - - return target, indexer - - def _reindex_non_unique(self, target): - """ - *this is an internal non-public method* - - Create a new index with target's values (move/add/delete values as - necessary) use with non-unique Index and a possibly non-unique target - - Parameters - ---------- - target : an iterable - - Returns - ------- - new_index : pd.Index - Resulting index - indexer : np.ndarray or None - Indices of output values in original index - - """ - - target = _ensure_index(target) - indexer, missing = self.get_indexer_non_unique(target) - check = indexer != -1 - new_labels = self.take(indexer[check]) - new_indexer = None - - if len(missing): - l = np.arange(len(indexer)) - - missing = com._ensure_platform_int(missing) - missing_labels = target.take(missing) - missing_indexer = _ensure_int64(l[~check]) - cur_labels = self.take(indexer[check])._values - cur_indexer = _ensure_int64(l[check]) - - new_labels = np.empty(tuple([len(indexer)]), dtype=object) - new_labels[cur_indexer] = cur_labels - new_labels[missing_indexer] = missing_labels - - # a unique indexer - if target.is_unique: - - # see GH5553, make sure we use the right indexer - new_indexer = np.arange(len(indexer)) - new_indexer[cur_indexer] = np.arange(len(cur_labels)) - new_indexer[missing_indexer] = -1 - - # we have a non_unique selector, need to use the original - # indexer here - else: - - # need to retake to have the same size as the indexer - indexer = indexer._values - indexer[~check] = 0 - - # reset the new indexer to account for the new size - new_indexer = np.arange(len(self.take(indexer))) - new_indexer[~check] = -1 - - new_index = self._shallow_copy_with_infer(new_labels, freq=None) - return new_index, indexer, new_indexer - - def join(self, other, how='left', level=None, return_indexers=False): - """ - *this is an internal non-public method* - - Compute join_index and indexers to conform data - structures to the new index. - - Parameters - ---------- - other : Index - how : {'left', 'right', 'inner', 'outer'} - level : int or level name, default None - return_indexers : boolean, default False - - Returns - ------- - join_index, (left_indexer, right_indexer) - """ - self_is_mi = isinstance(self, MultiIndex) - other_is_mi = isinstance(other, MultiIndex) - - # try to figure out the join level - # GH3662 - if level is None and (self_is_mi or other_is_mi): - - # have the same levels/names so a simple join - if self.names == other.names: - pass - else: - return self._join_multi(other, how=how, - return_indexers=return_indexers) - - # join on the level - if level is not None and (self_is_mi or other_is_mi): - return self._join_level(other, level, how=how, - return_indexers=return_indexers) - - other = _ensure_index(other) - - if len(other) == 0 and how in ('left', 'outer'): - join_index = self._shallow_copy() - if return_indexers: - rindexer = np.repeat(-1, len(join_index)) - return join_index, None, rindexer - else: - return join_index - - if len(self) == 0 and how in ('right', 'outer'): - join_index = other._shallow_copy() - if return_indexers: - lindexer = np.repeat(-1, len(join_index)) - return join_index, lindexer, None - else: - return join_index - - if self._join_precedence < other._join_precedence: - how = {'right': 'left', 'left': 'right'}.get(how, how) - result = other.join(self, how=how, level=level, - return_indexers=return_indexers) - if return_indexers: - x, y, z = result - result = x, z, y - return result - - if not is_dtype_equal(self.dtype, other.dtype): - this = self.astype('O') - other = other.astype('O') - return this.join(other, how=how, return_indexers=return_indexers) - - _validate_join_method(how) - - if not self.is_unique and not other.is_unique: - return self._join_non_unique(other, how=how, - return_indexers=return_indexers) - elif not self.is_unique or not other.is_unique: - if self.is_monotonic and other.is_monotonic: - return self._join_monotonic(other, how=how, - return_indexers=return_indexers) - else: - return self._join_non_unique(other, how=how, - return_indexers=return_indexers) - elif self.is_monotonic and other.is_monotonic: - try: - return self._join_monotonic(other, how=how, - return_indexers=return_indexers) - except TypeError: - pass - - if how == 'left': - join_index = self - elif how == 'right': - join_index = other - elif how == 'inner': - join_index = self.intersection(other) - elif how == 'outer': - join_index = self.union(other) - - if return_indexers: - if join_index is self: - lindexer = None - else: - lindexer = self.get_indexer(join_index) - if join_index is other: - rindexer = None - else: - rindexer = other.get_indexer(join_index) - return join_index, lindexer, rindexer - else: - return join_index - - def _join_multi(self, other, how, return_indexers=True): - - self_is_mi = isinstance(self, MultiIndex) - other_is_mi = isinstance(other, MultiIndex) - - # figure out join names - self_names = [n for n in self.names if n is not None] - other_names = [n for n in other.names if n is not None] - overlap = list(set(self_names) & set(other_names)) - - # need at least 1 in common, but not more than 1 - if not len(overlap): - raise ValueError("cannot join with no level specified and no " - "overlapping names") - if len(overlap) > 1: - raise NotImplementedError("merging with more than one level " - "overlap on a multi-index is not " - "implemented") - jl = overlap[0] - - # make the indices into mi's that match - if not (self_is_mi and other_is_mi): - - flip_order = False - if self_is_mi: - self, other = other, self - flip_order = True - # flip if join method is right or left - how = {'right': 'left', 'left': 'right'}.get(how, how) - - level = other.names.index(jl) - result = self._join_level(other, level, how=how, - return_indexers=return_indexers) - - if flip_order: - if isinstance(result, tuple): - return result[0], result[2], result[1] - return result - - # 2 multi-indexes - raise NotImplementedError("merging with both multi-indexes is not " - "implemented") - - def _join_non_unique(self, other, how='left', return_indexers=False): - from pandas.tools.merge import _get_join_indexers - - left_idx, right_idx = _get_join_indexers([self.values], - [other._values], how=how, - sort=True) - - left_idx = com._ensure_platform_int(left_idx) - right_idx = com._ensure_platform_int(right_idx) - - join_index = self.values.take(left_idx) - mask = left_idx == -1 - np.putmask(join_index, mask, other._values.take(right_idx)) - - join_index = self._wrap_joined_index(join_index, other) - - if return_indexers: - return join_index, left_idx, right_idx - else: - return join_index - - def _join_level(self, other, level, how='left', return_indexers=False, - keep_order=True): - """ - The join method *only* affects the level of the resulting - MultiIndex. Otherwise it just exactly aligns the Index data to the - labels of the level in the MultiIndex. If `keep_order` == True, the - order of the data indexed by the MultiIndex will not be changed; - otherwise, it will tie out with `other`. - """ - from pandas.algos import groupsort_indexer - - def _get_leaf_sorter(labels): - ''' - returns sorter for the inner most level while preserving the - order of higher levels - ''' - if labels[0].size == 0: - return np.empty(0, dtype='int64') - - if len(labels) == 1: - lab = _ensure_int64(labels[0]) - sorter, _ = groupsort_indexer(lab, 1 + lab.max()) - return sorter - - # find indexers of begining of each set of - # same-key labels w.r.t all but last level - tic = labels[0][:-1] != labels[0][1:] - for lab in labels[1:-1]: - tic |= lab[:-1] != lab[1:] - - starts = np.hstack(([True], tic, [True])).nonzero()[0] - lab = _ensure_int64(labels[-1]) - return lib.get_level_sorter(lab, _ensure_int64(starts)) - - if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): - raise TypeError('Join on level between two MultiIndex objects ' - 'is ambiguous') - - left, right = self, other - - flip_order = not isinstance(self, MultiIndex) - if flip_order: - left, right = right, left - how = {'right': 'left', 'left': 'right'}.get(how, how) - - level = left._get_level_number(level) - old_level = left.levels[level] - - if not right.is_unique: - raise NotImplementedError('Index._join_level on non-unique index ' - 'is not implemented') - - new_level, left_lev_indexer, right_lev_indexer = \ - old_level.join(right, how=how, return_indexers=True) - - if left_lev_indexer is None: - if keep_order or len(left) == 0: - left_indexer = None - join_index = left - else: # sort the leaves - left_indexer = _get_leaf_sorter(left.labels[:level + 1]) - join_index = left[left_indexer] - - else: - left_lev_indexer = _ensure_int64(left_lev_indexer) - rev_indexer = lib.get_reverse_indexer(left_lev_indexer, - len(old_level)) - - new_lev_labels = com.take_nd(rev_indexer, left.labels[level], - allow_fill=False) - - new_labels = list(left.labels) - new_labels[level] = new_lev_labels - - new_levels = list(left.levels) - new_levels[level] = new_level - - if keep_order: # just drop missing values. o.w. keep order - left_indexer = np.arange(len(left)) - mask = new_lev_labels != -1 - if not mask.all(): - new_labels = [lab[mask] for lab in new_labels] - left_indexer = left_indexer[mask] - - else: # tie out the order with other - if level == 0: # outer most level, take the fast route - ngroups = 1 + new_lev_labels.max() - left_indexer, counts = groupsort_indexer(new_lev_labels, - ngroups) - # missing values are placed first; drop them! - left_indexer = left_indexer[counts[0]:] - new_labels = [lab[left_indexer] for lab in new_labels] - - else: # sort the leaves - mask = new_lev_labels != -1 - mask_all = mask.all() - if not mask_all: - new_labels = [lab[mask] for lab in new_labels] - - left_indexer = _get_leaf_sorter(new_labels[:level + 1]) - new_labels = [lab[left_indexer] for lab in new_labels] - - # left_indexers are w.r.t masked frame. - # reverse to original frame! - if not mask_all: - left_indexer = mask.nonzero()[0][left_indexer] - - join_index = MultiIndex(levels=new_levels, labels=new_labels, - names=left.names, verify_integrity=False) - - if right_lev_indexer is not None: - right_indexer = com.take_nd(right_lev_indexer, - join_index.labels[level], - allow_fill=False) - else: - right_indexer = join_index.labels[level] - - if flip_order: - left_indexer, right_indexer = right_indexer, left_indexer - - if return_indexers: - return join_index, left_indexer, right_indexer - else: - return join_index - - def _join_monotonic(self, other, how='left', return_indexers=False): - if self.equals(other): - ret_index = other if how == 'right' else self - if return_indexers: - return ret_index, None, None - else: - return ret_index - - sv = self.values - ov = other._values - - if self.is_unique and other.is_unique: - # We can perform much better than the general case - if how == 'left': - join_index = self - lidx = None - ridx = self._left_indexer_unique(sv, ov) - elif how == 'right': - join_index = other - lidx = self._left_indexer_unique(ov, sv) - ridx = None - elif how == 'inner': - join_index, lidx, ridx = self._inner_indexer(sv, ov) - join_index = self._wrap_joined_index(join_index, other) - elif how == 'outer': - join_index, lidx, ridx = self._outer_indexer(sv, ov) - join_index = self._wrap_joined_index(join_index, other) - else: - if how == 'left': - join_index, lidx, ridx = self._left_indexer(sv, ov) - elif how == 'right': - join_index, ridx, lidx = self._left_indexer(ov, sv) - elif how == 'inner': - join_index, lidx, ridx = self._inner_indexer(sv, ov) - elif how == 'outer': - join_index, lidx, ridx = self._outer_indexer(sv, ov) - join_index = self._wrap_joined_index(join_index, other) - - if return_indexers: - return join_index, lidx, ridx - else: - return join_index - - def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None - return Index(joined, name=name) - - def slice_indexer(self, start=None, end=None, step=None, kind=None): - """ - For an ordered Index, compute the slice indexer for input labels and - step - - Parameters - ---------- - start : label, default None - If None, defaults to the beginning - end : label, default None - If None, defaults to the end - step : int, default None - kind : string, default None - - Returns - ------- - indexer : ndarray or slice - - Notes - ----- - This function assumes that the data is sorted, so use at your own peril - """ - start_slice, end_slice = self.slice_locs(start, end, step=step, - kind=kind) - - # return a slice - if not lib.isscalar(start_slice): - raise AssertionError("Start slice bound is non-scalar") - if not lib.isscalar(end_slice): - raise AssertionError("End slice bound is non-scalar") - - return slice(start_slice, end_slice, step) - - def _maybe_cast_slice_bound(self, label, side, kind): - """ - This function should be overloaded in subclasses that allow non-trivial - casting on label-slice bounds, e.g. datetime-like indices allowing - strings containing formatted datetimes. - - Parameters - ---------- - label : object - side : {'left', 'right'} - kind : string / None - - Returns - ------- - label : object - - Notes - ----- - Value of `side` parameter should be validated in caller. - - """ - - # We are a plain index here (sub-class override this method if they - # wish to have special treatment for floats/ints, e.g. Float64Index and - # datetimelike Indexes - # reject them - if is_float(label): - self._invalid_indexer('slice', label) - - # we are trying to find integer bounds on a non-integer based index - # this is rejected (generally .loc gets you here) - elif is_integer(label): - self._invalid_indexer('slice', label) - - return label - - def _searchsorted_monotonic(self, label, side='left'): - if self.is_monotonic_increasing: - return self.searchsorted(label, side=side) - elif self.is_monotonic_decreasing: - # np.searchsorted expects ascending sort order, have to reverse - # everything for it to work (element ordering, search side and - # resulting value). - pos = self[::-1].searchsorted(label, side='right' if side == 'left' - else 'right') - return len(self) - pos - - raise ValueError('index must be monotonic increasing or decreasing') - - def get_slice_bound(self, label, side, kind): - """ - Calculate slice bound that corresponds to given label. - - Returns leftmost (one-past-the-rightmost if ``side=='right'``) position - of given label. - - Parameters - ---------- - label : object - side : {'left', 'right'} - kind : string / None, the type of indexer - - """ - if side not in ('left', 'right'): - raise ValueError("Invalid value for side kwarg," - " must be either 'left' or 'right': %s" % - (side, )) - - original_label = label - - # For datetime indices label may be a string that has to be converted - # to datetime boundary according to its resolution. - label = self._maybe_cast_slice_bound(label, side, kind) - - # we need to look up the label - try: - slc = self.get_loc(label) - except KeyError as err: - try: - return self._searchsorted_monotonic(label, side) - except ValueError: - # raise the original KeyError - raise err - - if isinstance(slc, np.ndarray): - # get_loc may return a boolean array or an array of indices, which - # is OK as long as they are representable by a slice. - if is_bool_dtype(slc): - slc = lib.maybe_booleans_to_slice(slc.view('u1')) - else: - slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self)) - if isinstance(slc, np.ndarray): - raise KeyError("Cannot get %s slice bound for non-unique " - "label: %r" % (side, original_label)) - - if isinstance(slc, slice): - if side == 'left': - return slc.start - else: - return slc.stop - else: - if side == 'right': - return slc + 1 - else: - return slc - - def slice_locs(self, start=None, end=None, step=None, kind=None): - """ - Compute slice locations for input labels. - - Parameters - ---------- - start : label, default None - If None, defaults to the beginning - end : label, default None - If None, defaults to the end - step : int, defaults None - If None, defaults to 1 - kind : string, defaults None - - Returns - ------- - start, end : int - - """ - inc = (step is None or step >= 0) - - if not inc: - # If it's a reverse slice, temporarily swap bounds. - start, end = end, start - - start_slice = None - if start is not None: - start_slice = self.get_slice_bound(start, 'left', kind) - if start_slice is None: - start_slice = 0 - - end_slice = None - if end is not None: - end_slice = self.get_slice_bound(end, 'right', kind) - if end_slice is None: - end_slice = len(self) - - if not inc: - # Bounds at this moment are swapped, swap them back and shift by 1. - # - # slice_locs('B', 'A', step=-1): s='B', e='A' - # - # s='A' e='B' - # AFTER SWAP: | | - # v ------------------> V - # ----------------------------------- - # | | |A|A|A|A| | | | | |B|B| | | | | - # ----------------------------------- - # ^ <------------------ ^ - # SHOULD BE: | | - # end=s-1 start=e-1 - # - end_slice, start_slice = start_slice - 1, end_slice - 1 - - # i == -1 triggers ``len(self) + i`` selection that points to the - # last element, not before-the-first one, subtracting len(self) - # compensates that. - if end_slice == -1: - end_slice -= len(self) - if start_slice == -1: - start_slice -= len(self) - - return start_slice, end_slice - - def delete(self, loc): - """ - Make new Index with passed location(-s) deleted - - Returns - ------- - new_index : Index - """ - return self._shallow_copy(np.delete(self._data, loc)) - - def insert(self, loc, item): - """ - Make new Index inserting new item at location. Follows - Python list.append semantics for negative values - - Parameters - ---------- - loc : int - item : object - - Returns - ------- - new_index : Index - """ - _self = np.asarray(self) - item = self._coerce_scalar_to_index(item)._values - - idx = np.concatenate((_self[:loc], item, _self[loc:])) - return self._shallow_copy_with_infer(idx) - - def drop(self, labels, errors='raise'): - """ - Make new Index with passed list of labels deleted - - Parameters - ---------- - labels : array-like - errors : {'ignore', 'raise'}, default 'raise' - If 'ignore', suppress error and existing labels are dropped. - - Returns - ------- - dropped : Index - """ - labels = com._index_labels_to_array(labels) - indexer = self.get_indexer(labels) - mask = indexer == -1 - if mask.any(): - if errors != 'ignore': - raise ValueError('labels %s not contained in axis' % - labels[mask]) - indexer = indexer[~mask] - return self.delete(indexer) - - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) - @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) - def drop_duplicates(self, keep='first'): - return super(Index, self).drop_duplicates(keep=keep) - - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) - def duplicated(self, keep='first'): - return super(Index, self).duplicated(keep=keep) - - _index_shared_docs['fillna'] = """ - Fill NA/NaN values with the specified value - - Parameters - ---------- - value : scalar - Scalar value to use to fill holes (e.g. 0). - This value cannot be a list-likes. - downcast : dict, default is None - a dict of item->dtype of what to downcast if possible, - or the string 'infer' which will try to downcast to an appropriate - equal type (e.g. float64 to int64 if possible) - - Returns - ------- - filled : Index - """ - - @Appender(_index_shared_docs['fillna']) - def fillna(self, value=None, downcast=None): - self._assert_can_do_op(value) - if self.hasnans: - result = self.putmask(self._isnan, value) - if downcast is None: - # no need to care metadata other than name - # because it can't have freq if - return Index(result, name=self.name) - return self._shallow_copy() - - def _evaluate_with_timedelta_like(self, other, op, opstr): - raise TypeError("can only perform ops with timedelta like values") - - def _evaluate_with_datetime_like(self, other, op, opstr): - raise TypeError("can only perform ops with datetime like values") - - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - - def _make_compare(op): - def _evaluate_compare(self, other): - if isinstance(other, (np.ndarray, Index, ABCSeries)): - if other.ndim > 0 and len(self) != len(other): - raise ValueError('Lengths must match to compare') - func = getattr(self.values, op) - result = func(np.asarray(other)) - - # technically we could support bool dtyped Index - # for now just return the indexing array directly - if is_bool_dtype(result): - return result - try: - return Index(result) - except TypeError: - return result - - return _evaluate_compare - - cls.__eq__ = _make_compare('__eq__') - cls.__ne__ = _make_compare('__ne__') - cls.__lt__ = _make_compare('__lt__') - cls.__gt__ = _make_compare('__gt__') - cls.__le__ = _make_compare('__le__') - cls.__ge__ = _make_compare('__ge__') - - @classmethod - def _add_numericlike_set_methods_disabled(cls): - """ add in the numeric set-like methods to disable """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - - cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa - cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa - - @classmethod - def _add_numeric_methods_disabled(cls): - """ add in numeric methods to disable """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - - cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') - cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') - cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') - cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__') - if not compat.PY3: - cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__') - cls.__neg__ = _make_invalid_op('__neg__') - cls.__pos__ = _make_invalid_op('__pos__') - cls.__abs__ = _make_invalid_op('__abs__') - cls.__inv__ = _make_invalid_op('__inv__') - - def _maybe_update_attributes(self, attrs): - """ Update Index attributes (e.g. freq) depending on op """ - return attrs - - def _validate_for_numeric_unaryop(self, op, opstr): - """ validate if we can perform a numeric unary operation """ - - if not self._is_numeric_dtype: - raise TypeError("cannot evaluate a numeric op " - "{opstr} for type: {typ}".format( - opstr=opstr, - typ=type(self)) - ) - - def _validate_for_numeric_binop(self, other, op, opstr): - """ - return valid other, evaluate or raise TypeError - if we are not of the appropriate type - - internal method called by ops - """ - from pandas.tseries.offsets import DateOffset - - # if we are an inheritor of numeric, - # but not actually numeric (e.g. DatetimeIndex/PeriodInde) - if not self._is_numeric_dtype: - raise TypeError("cannot evaluate a numeric op {opstr} " - "for type: {typ}".format( - opstr=opstr, - typ=type(self)) - ) - - if isinstance(other, Index): - if not other._is_numeric_dtype: - raise TypeError("cannot evaluate a numeric op " - "{opstr} with type: {typ}".format( - opstr=type(self), - typ=type(other)) - ) - elif isinstance(other, np.ndarray) and not other.ndim: - other = other.item() - - if isinstance(other, (Index, ABCSeries, np.ndarray)): - if len(self) != len(other): - raise ValueError("cannot evaluate a numeric op with " - "unequal lengths") - other = _values_from_object(other) - if other.dtype.kind not in ['f', 'i']: - raise TypeError("cannot evaluate a numeric op " - "with a non-numeric dtype") - elif isinstance(other, (DateOffset, np.timedelta64, - Timedelta, datetime.timedelta)): - # higher up to handle - pass - elif isinstance(other, (Timestamp, np.datetime64)): - # higher up to handle - pass - else: - if not (is_float(other) or is_integer(other)): - raise TypeError("can only perform ops with scalar values") - - return other - - @classmethod - def _add_numeric_methods_binary(cls): - """ add in numeric methods """ - - def _make_evaluate_binop(op, opstr, reversed=False): - def _evaluate_numeric_binop(self, other): - - from pandas.tseries.offsets import DateOffset - other = self._validate_for_numeric_binop(other, op, opstr) - - # handle time-based others - if isinstance(other, (DateOffset, np.timedelta64, - Timedelta, datetime.timedelta)): - return self._evaluate_with_timedelta_like(other, op, opstr) - elif isinstance(other, (Timestamp, np.datetime64)): - return self._evaluate_with_datetime_like(other, op, opstr) - - # if we are a reversed non-communative op - values = self.values - if reversed: - values, other = other, values - - attrs = self._get_attributes_dict() - attrs = self._maybe_update_attributes(attrs) - return Index(op(values, other), **attrs) - - return _evaluate_numeric_binop - - cls.__add__ = cls.__radd__ = _make_evaluate_binop( - operator.add, '__add__') - cls.__sub__ = _make_evaluate_binop( - operator.sub, '__sub__') - cls.__rsub__ = _make_evaluate_binop( - operator.sub, '__sub__', reversed=True) - cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( - operator.mul, '__mul__') - cls.__pow__ = cls.__rpow__ = _make_evaluate_binop( - operator.pow, '__pow__') - cls.__mod__ = _make_evaluate_binop( - operator.mod, '__mod__') - cls.__floordiv__ = _make_evaluate_binop( - operator.floordiv, '__floordiv__') - cls.__rfloordiv__ = _make_evaluate_binop( - operator.floordiv, '__floordiv__', reversed=True) - cls.__truediv__ = _make_evaluate_binop( - operator.truediv, '__truediv__') - cls.__rtruediv__ = _make_evaluate_binop( - operator.truediv, '__truediv__', reversed=True) - if not compat.PY3: - cls.__div__ = _make_evaluate_binop( - operator.div, '__div__') - cls.__rdiv__ = _make_evaluate_binop( - operator.div, '__div__', reversed=True) - - @classmethod - def _add_numeric_methods_unary(cls): - """ add in numeric unary methods """ - - def _make_evaluate_unary(op, opstr): - - def _evaluate_numeric_unary(self): - - self._validate_for_numeric_unaryop(op, opstr) - attrs = self._get_attributes_dict() - attrs = self._maybe_update_attributes(attrs) - return Index(op(self.values), **attrs) - - return _evaluate_numeric_unary - - cls.__neg__ = _make_evaluate_unary(lambda x: -x, '__neg__') - cls.__pos__ = _make_evaluate_unary(lambda x: x, '__pos__') - cls.__abs__ = _make_evaluate_unary(np.abs, '__abs__') - cls.__inv__ = _make_evaluate_unary(lambda x: -x, '__inv__') - - @classmethod - def _add_numeric_methods(cls): - cls._add_numeric_methods_unary() - cls._add_numeric_methods_binary() - - @classmethod - def _add_logical_methods(cls): - """ add in logical methods """ - - _doc = """ - - %(desc)s - - Parameters - ---------- - All arguments to numpy.%(outname)s are accepted. - - Returns - ------- - %(outname)s : bool or array_like (if axis is specified) - A single element array_like may be converted to bool.""" - - def _make_logical_function(name, desc, f): - @Substitution(outname=name, desc=desc) - @Appender(_doc) - def logical_func(self, *args, **kwargs): - result = f(self.values) - if (isinstance(result, (np.ndarray, ABCSeries, Index)) and - result.ndim == 0): - # return NumPy type - return result.dtype.type(result.item()) - else: # pragma: no cover - return result - - logical_func.__name__ = name - return logical_func - - cls.all = _make_logical_function('all', 'Return whether all elements ' - 'are True', - np.all) - cls.any = _make_logical_function('any', - 'Return whether any element is True', - np.any) - - @classmethod - def _add_logical_methods_disabled(cls): - """ add in logical methods to disable """ - - def _make_invalid_op(name): - def invalid_op(self, other=None): - raise TypeError("cannot perform {name} with this index type: " - "{typ}".format(name=name, typ=type(self))) - - invalid_op.__name__ = name - return invalid_op - - cls.all = _make_invalid_op('all') - cls.any = _make_invalid_op('any') - - -Index._add_numeric_methods_disabled() -Index._add_logical_methods() -Index._add_comparison_methods() - - -class CategoricalIndex(Index, PandasDelegate): - """ - - Immutable Index implementing an ordered, sliceable set. CategoricalIndex - represents a sparsely populated Index with an underlying Categorical. - - .. versionadded:: 0.16.1 - - Parameters - ---------- - data : array-like or Categorical, (1-dimensional) - categories : optional, array-like - categories for the CategoricalIndex - ordered : boolean, - designating if the categories are ordered - copy : bool - Make a copy of input ndarray - name : object - Name to be stored in the index - - """ - - _typ = 'categoricalindex' - _engine_type = _index.Int64Engine - _attributes = ['name'] - - def __new__(cls, data=None, categories=None, ordered=None, dtype=None, - copy=False, name=None, fastpath=False, **kwargs): - - if fastpath: - return cls._simple_new(data, name=name) - - if isinstance(data, ABCCategorical): - data = cls._create_categorical(cls, data, categories, ordered) - elif isinstance(data, CategoricalIndex): - data = data._data - data = cls._create_categorical(cls, data, categories, ordered) - else: - - # don't allow scalars - # if data is None, then categories must be provided - if lib.isscalar(data): - if data is not None or categories is None: - cls._scalar_data_error(data) - data = [] - data = cls._create_categorical(cls, data, categories, ordered) - - if copy: - data = data.copy() - - return cls._simple_new(data, name=name) - - def _create_from_codes(self, codes, categories=None, ordered=None, - name=None): - """ - *this is an internal non-public method* - - create the correct categorical from codes - - Parameters - ---------- - codes : new codes - categories : optional categories, defaults to existing - ordered : optional ordered attribute, defaults to existing - name : optional name attribute, defaults to existing - - Returns - ------- - CategoricalIndex - """ - - from pandas.core.categorical import Categorical - if categories is None: - categories = self.categories - if ordered is None: - ordered = self.ordered - if name is None: - name = self.name - cat = Categorical.from_codes(codes, categories=categories, - ordered=self.ordered) - return CategoricalIndex(cat, name=name) - - @staticmethod - def _create_categorical(self, data, categories=None, ordered=None): - """ - *this is an internal non-public method* - - create the correct categorical from data and the properties - - Parameters - ---------- - data : data for new Categorical - categories : optional categories, defaults to existing - ordered : optional ordered attribute, defaults to existing - - Returns - ------- - Categorical - """ - - if not isinstance(data, ABCCategorical): - from pandas.core.categorical import Categorical - data = Categorical(data, categories=categories, ordered=ordered) - else: - if categories is not None: - data = data.set_categories(categories) - if ordered is not None: - data = data.set_ordered(ordered) - return data - - @classmethod - def _simple_new(cls, values, name=None, categories=None, ordered=None, - **kwargs): - result = object.__new__(cls) - - values = cls._create_categorical(cls, values, categories, ordered) - result._data = values - result.name = name - for k, v in compat.iteritems(kwargs): - setattr(result, k, v) - - result._reset_identity() - return result - - def _is_dtype_compat(self, other): - """ - *this is an internal non-public method* - - provide a comparison between the dtype of self and other (coercing if - needed) - - Raises - ------ - TypeError if the dtypes are not compatible - """ - - if is_categorical_dtype(other): - if isinstance(other, CategoricalIndex): - other = other._values - if not other.is_dtype_equal(self): - raise TypeError("categories must match existing categories " - "when appending") - else: - values = other - if not is_list_like(values): - values = [values] - other = CategoricalIndex(self._create_categorical( - self, other, categories=self.categories, ordered=self.ordered)) - if not other.isin(values).all(): - raise TypeError("cannot append a non-category item to a " - "CategoricalIndex") - - return other - - def equals(self, other): - """ - Determines if two CategorialIndex objects contain the same elements. - """ - if self.is_(other): - return True - - try: - other = self._is_dtype_compat(other) - return array_equivalent(self._data, other) - except (TypeError, ValueError): - pass - - return False - - @property - def _formatter_func(self): - return self.categories._formatter_func - - def _format_attrs(self): - """ - Return a list of tuples of the (attr,formatted_value) - """ - max_categories = (10 if get_option("display.max_categories") == 0 else - get_option("display.max_categories")) - attrs = [('categories', default_pprint(self.categories, - max_seq_items=max_categories)), - ('ordered', self.ordered)] - if self.name is not None: - attrs.append(('name', default_pprint(self.name))) - attrs.append(('dtype', "'%s'" % self.dtype)) - max_seq_items = get_option('display.max_seq_items') or len(self) - if len(self) > max_seq_items: - attrs.append(('length', len(self))) - return attrs - - @property - def inferred_type(self): - return 'categorical' - - @property - def values(self): - """ return the underlying data, which is a Categorical """ - return self._data - - def get_values(self): - """ return the underlying data as an ndarray """ - return self._data.get_values() - - @property - def codes(self): - return self._data.codes - - @property - def categories(self): - return self._data.categories - - @property - def ordered(self): - return self._data.ordered - - def __contains__(self, key): - hash(key) - return key in self.values - - def __array__(self, dtype=None): - """ the array interface, return my values """ - return np.array(self._data, dtype=dtype) - - @cache_readonly - def _isnan(self): - """ return if each value is nan""" - return self._data.codes == -1 - - @Appender(_index_shared_docs['fillna']) - def fillna(self, value, downcast=None): - self._assert_can_do_op(value) - return CategoricalIndex(self._data.fillna(value), name=self.name) - - def argsort(self, *args, **kwargs): - return self.values.argsort(*args, **kwargs) - - @cache_readonly - def _engine(self): - - # we are going to look things up with the codes themselves - return self._engine_type(lambda: self.codes.astype('i8'), len(self)) - - @cache_readonly - def is_unique(self): - return not self.duplicated().any() - - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) - def duplicated(self, keep='first'): - from pandas.hashtable import duplicated_int64 - return duplicated_int64(self.codes.astype('i8'), keep) - - def _to_safe_for_reshape(self): - """ convert to object if we are a categorical """ - return self.astype('object') - - def get_loc(self, key, method=None): - """ - Get integer location for requested label - - Parameters - ---------- - key : label - method : {None} - * default: exact matches only. - - Returns - ------- - loc : int if unique index, possibly slice or mask if not - """ - codes = self.categories.get_loc(key) - if (codes == -1): - raise KeyError(key) - indexer, _ = self._engine.get_indexer_non_unique(np.array([codes])) - if (indexer == -1).any(): - raise KeyError(key) - - return indexer - - def _can_reindex(self, indexer): - """ always allow reindexing """ - pass - - def reindex(self, target, method=None, level=None, limit=None, - tolerance=None): - """ - Create index with target's values (move/add/delete values as necessary) - - Returns - ------- - new_index : pd.Index - Resulting index - indexer : np.ndarray or None - Indices of output values in original index - - """ - - if method is not None: - raise NotImplementedError("argument method is not implemented for " - "CategoricalIndex.reindex") - if level is not None: - raise NotImplementedError("argument level is not implemented for " - "CategoricalIndex.reindex") - if limit is not None: - raise NotImplementedError("argument limit is not implemented for " - "CategoricalIndex.reindex") - - target = _ensure_index(target) - - if not is_categorical_dtype(target) and not target.is_unique: - raise ValueError("cannot reindex with a non-unique indexer") - - indexer, missing = self.get_indexer_non_unique(np.array(target)) - new_target = self.take(indexer) - - # filling in missing if needed - if len(missing): - cats = self.categories.get_indexer(target) - - if (cats == -1).any(): - # coerce to a regular index here! - result = Index(np.array(self), name=self.name) - new_target, indexer, _ = result._reindex_non_unique( - np.array(target)) - - else: - - codes = new_target.codes.copy() - codes[indexer == -1] = cats[missing] - new_target = self._create_from_codes(codes) - - # we always want to return an Index type here - # to be consistent with .reindex for other index types (e.g. they don't - # coerce based on the actual values, only on the dtype) - # unless we had an inital Categorical to begin with - # in which case we are going to conform to the passed Categorical - new_target = np.asarray(new_target) - if is_categorical_dtype(target): - new_target = target._shallow_copy(new_target, name=self.name) - else: - new_target = Index(new_target, name=self.name) - - return new_target, indexer - - def _reindex_non_unique(self, target): - """ reindex from a non-unique; which CategoricalIndex's are almost - always - """ - new_target, indexer = self.reindex(target) - new_indexer = None - - check = indexer == -1 - if check.any(): - new_indexer = np.arange(len(self.take(indexer))) - new_indexer[check] = -1 - - cats = self.categories.get_indexer(target) - if not (cats == -1).any(): - # .reindex returns normal Index. Revert to CategoricalIndex if - # all targets are included in my categories - new_target = self._shallow_copy(new_target) - - return new_target, indexer, new_indexer - - def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ - Compute indexer and mask for new index given the current index. The - indexer should be then used as an input to ndarray.take to align the - current data to the new index. The mask determines whether labels are - found or not in the current index - - Parameters - ---------- - target : MultiIndex or Index (of tuples) - method : {'pad', 'ffill', 'backfill', 'bfill'} - pad / ffill: propagate LAST valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - - Notes - ----- - This is a low-level method and probably should be used at your own risk - - Examples - -------- - >>> indexer, mask = index.get_indexer(new_index) - >>> new_values = cur_values.take(indexer) - >>> new_values[-mask] = np.nan - - Returns - ------- - (indexer, mask) : (ndarray, ndarray) - """ - method = _clean_reindex_fill_method(method) - target = _ensure_index(target) - - if isinstance(target, CategoricalIndex): - target = target.categories - - if method == 'pad' or method == 'backfill': - raise NotImplementedError("method='pad' and method='backfill' not " - "implemented yet for CategoricalIndex") - elif method == 'nearest': - raise NotImplementedError("method='nearest' not implemented yet " - 'for CategoricalIndex') - else: - - codes = self.categories.get_indexer(target) - indexer, _ = self._engine.get_indexer_non_unique(codes) - - return com._ensure_platform_int(indexer) - - def get_indexer_non_unique(self, target): - """ this is the same for a CategoricalIndex for get_indexer; the API - returns the missing values as well - """ - target = _ensure_index(target) - - if isinstance(target, CategoricalIndex): - target = target.categories - - codes = self.categories.get_indexer(target) - return self._engine.get_indexer_non_unique(codes) - - def _convert_list_indexer(self, keyarr, kind=None): - """ - we are passed a list indexer. - Return our indexer or raise if all of the values are not included in - the categories - """ - codes = self.categories.get_indexer(keyarr) - if (codes == -1).any(): - raise KeyError("a list-indexer must only include values that are " - "in the categories") - - return None - - def take(self, indexer, axis=0, allow_fill=True, fill_value=None): - """ - For internal compatibility with numpy arrays. - - # filling must always be None/nan here - # but is passed thru internally - assert isnull(fill_value) - - See also - -------- - numpy.ndarray.take - """ - - indexer = com._ensure_platform_int(indexer) - taken = self.codes.take(indexer) - return self._create_from_codes(taken) - - def delete(self, loc): - """ - Make new Index with passed location(-s) deleted - - Returns - ------- - new_index : Index - """ - return self._create_from_codes(np.delete(self.codes, loc)) - - def insert(self, loc, item): - """ - Make new Index inserting new item at location. Follows - Python list.append semantics for negative values - - Parameters - ---------- - loc : int - item : object - - Returns - ------- - new_index : Index - - Raises - ------ - ValueError if the item is not in the categories - - """ - code = self.categories.get_indexer([item]) - if (code == -1): - raise TypeError("cannot insert an item into a CategoricalIndex " - "that is not already an existing category") - - codes = self.codes - codes = np.concatenate((codes[:loc], code, codes[loc:])) - return self._create_from_codes(codes) - - def append(self, other): - """ - Append a collection of CategoricalIndex options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : Index - - Raises - ------ - ValueError if other is not in the categories - """ - to_concat, name = self._ensure_compat_append(other) - to_concat = [self._is_dtype_compat(c) for c in to_concat] - codes = np.concatenate([c.codes for c in to_concat]) - return self._create_from_codes(codes, name=name) - - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - - def _make_compare(op): - def _evaluate_compare(self, other): - - # if we have a Categorical type, then must have the same - # categories - if isinstance(other, CategoricalIndex): - other = other._values - elif isinstance(other, Index): - other = self._create_categorical( - self, other._values, categories=self.categories, - ordered=self.ordered) - - if isinstance(other, (ABCCategorical, np.ndarray, ABCSeries)): - if len(self.values) != len(other): - raise ValueError("Lengths must match to compare") - - if isinstance(other, ABCCategorical): - if not self.values.is_dtype_equal(other): - raise TypeError("categorical index comparisions must " - "have the same categories and ordered " - "attributes") - - return getattr(self.values, op)(other) - - return _evaluate_compare - - cls.__eq__ = _make_compare('__eq__') - cls.__ne__ = _make_compare('__ne__') - cls.__lt__ = _make_compare('__lt__') - cls.__gt__ = _make_compare('__gt__') - cls.__le__ = _make_compare('__le__') - cls.__ge__ = _make_compare('__ge__') - - def _delegate_method(self, name, *args, **kwargs): - """ method delegation to the ._values """ - method = getattr(self._values, name) - if 'inplace' in kwargs: - raise ValueError("cannot use inplace with CategoricalIndex") - res = method(*args, **kwargs) - if lib.isscalar(res): - return res - return CategoricalIndex(res, name=self.name) - - @classmethod - def _add_accessors(cls): - """ add in Categorical accessor methods """ - - from pandas.core.categorical import Categorical - CategoricalIndex._add_delegate_accessors( - delegate=Categorical, accessors=["rename_categories", - "reorder_categories", - "add_categories", - "remove_categories", - "remove_unused_categories", - "set_categories", - "as_ordered", "as_unordered", - "min", "max"], - typ='method', overwrite=True) - - -CategoricalIndex._add_numericlike_set_methods_disabled() -CategoricalIndex._add_numeric_methods_disabled() -CategoricalIndex._add_logical_methods_disabled() -CategoricalIndex._add_comparison_methods() -CategoricalIndex._add_accessors() - - -class NumericIndex(Index): - """ - Provide numeric type operations - - This is an abstract class - - """ - _is_numeric_dtype = True - - def _maybe_cast_slice_bound(self, label, side, kind): - """ - This function should be overloaded in subclasses that allow non-trivial - casting on label-slice bounds, e.g. datetime-like indices allowing - strings containing formatted datetimes. - - Parameters - ---------- - label : object - side : {'left', 'right'} - kind : string / None - - Returns - ------- - label : object - - Notes - ----- - Value of `side` parameter should be validated in caller. - - """ - - # we are a numeric index, so we accept - # integer/floats directly - if not (is_integer(label) or is_float(label)): - self._invalid_indexer('slice', label) - - return label - - def _convert_tolerance(self, tolerance): - try: - return float(tolerance) - except ValueError: - raise ValueError('tolerance argument for %s must be numeric: %r' % - (type(self).__name__, tolerance)) - - -class Int64Index(NumericIndex): - """ - Immutable ndarray implementing an ordered, sliceable set. The basic object - storing axis labels for all pandas objects. Int64Index is a special case - of `Index` with purely integer labels. This is the default index type used - by the DataFrame and Series ctors when no explicit index is provided by the - user. - - Parameters - ---------- - data : array-like (1-dimensional) - dtype : NumPy dtype (default: int64) - copy : bool - Make a copy of input ndarray - name : object - Name to be stored in the index - - Notes - ----- - An Index instance can **only** contain hashable objects - """ - - _typ = 'int64index' - _groupby = _algos.groupby_int64 - _arrmap = _algos.arrmap_int64 - _left_indexer_unique = _algos.left_join_indexer_unique_int64 - _left_indexer = _algos.left_join_indexer_int64 - _inner_indexer = _algos.inner_join_indexer_int64 - _outer_indexer = _algos.outer_join_indexer_int64 - - _can_hold_na = False - - _engine_type = _index.Int64Engine - - def __new__(cls, data=None, dtype=None, copy=False, name=None, - fastpath=False, **kwargs): - - if fastpath: - return cls._simple_new(data, name=name) - - # isscalar, generators handled in coerce_to_ndarray - data = cls._coerce_to_ndarray(data) - - if issubclass(data.dtype.type, compat.string_types): - cls._string_data_error(data) - - elif issubclass(data.dtype.type, np.integer): - # don't force the upcast as we may be dealing - # with a platform int - if (dtype is None or - not issubclass(np.dtype(dtype).type, np.integer)): - dtype = np.int64 - - subarr = np.array(data, dtype=dtype, copy=copy) - else: - subarr = np.array(data, dtype=np.int64, copy=copy) - if len(data) > 0: - if (subarr != data).any(): - raise TypeError('Unsafe NumPy casting to integer, you must' - ' explicitly cast') - - return cls._simple_new(subarr, name=name) - - @property - def inferred_type(self): - return 'integer' - - @property - def asi8(self): - # do not cache or you'll create a memory leak - return self.values.view('i8') - - @property - def is_all_dates(self): - """ - Checks that all the labels are datetime objects - """ - return False - - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if self.is_(other): - return True - - # if not isinstance(other, Int64Index): - # return False - - try: - return array_equivalent(_values_from_object(self), - _values_from_object(other)) - except TypeError: - # e.g. fails in numpy 1.6 with DatetimeIndex #1681 - return False - - def _wrap_joined_index(self, joined, other): - name = self.name if self.name == other.name else None - return Int64Index(joined, name=name) - - -Int64Index._add_numeric_methods() -Int64Index._add_logical_methods() - - -class RangeIndex(Int64Index): - - """ - Immutable Index implementing a monotonic range. RangeIndex is a - memory-saving special case of Int64Index limited to representing - monotonic ranges. - - Parameters - ---------- - start : int (default: 0) - stop : int (default: 0) - step : int (default: 1) - name : object, optional - Name to be stored in the index - copy : bool, default False - Make a copy of input if its a RangeIndex - - """ - - _typ = 'rangeindex' - _engine_type = _index.Int64Engine - - def __new__(cls, start=None, stop=None, step=None, name=None, dtype=None, - fastpath=False, copy=False, **kwargs): - - if fastpath: - return cls._simple_new(start, stop, step, name=name) - - cls._validate_dtype(dtype) - - # RangeIndex - if isinstance(start, RangeIndex): - if not copy: - return start - if name is None: - name = getattr(start, 'name', None) - start, stop, step = start._start, start._stop, start._step - - # validate the arguments - def _ensure_int(value, field): - try: - new_value = int(value) - except: - new_value = value - - if not is_integer(new_value) or new_value != value: - raise TypeError("RangeIndex(...) must be called with integers," - " {value} was passed for {field}".format( - value=type(value).__name__, - field=field) - ) - - return new_value - - if start is None: - start = 0 - else: - start = _ensure_int(start, 'start') - if stop is None: - stop = start - start = 0 - else: - stop = _ensure_int(stop, 'stop') - if step is None: - step = 1 - elif step == 0: - raise ValueError("Step must not be zero") - else: - step = _ensure_int(step, 'step') - - return cls._simple_new(start, stop, step, name) - - @classmethod - def from_range(cls, data, name=None, dtype=None, **kwargs): - """ create RangeIndex from a range (py3), or xrange (py2) object """ - if not isinstance(data, range): - raise TypeError( - '{0}(...) must be called with object coercible to a ' - 'range, {1} was passed'.format(cls.__name__, repr(data))) - - if compat.PY3: - step = data.step - stop = data.stop - start = data.start - else: - # seems we only have indexing ops to infer - # rather than direct accessors - if len(data) > 1: - step = data[1] - data[0] - stop = data[-1] + step - start = data[0] - elif len(data): - start = data[0] - stop = data[0] + 1 - step = 1 - else: - start = stop = 0 - step = 1 - return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs) - - @classmethod - def _simple_new(cls, start, stop=None, step=None, name=None, - dtype=None, **kwargs): - result = object.__new__(cls) - - # handle passed None, non-integers - if start is None or not is_integer(start): - try: - return RangeIndex(start, stop, step, name=name, **kwargs) - except TypeError: - return Index(start, stop, step, name=name, **kwargs) - - result._start = start - result._stop = stop or 0 - result._step = step or 1 - result.name = name - for k, v in compat.iteritems(kwargs): - setattr(result, k, v) - - result._reset_identity() - return result - - @staticmethod - def _validate_dtype(dtype): - """ require dtype to be None or int64 """ - if not (dtype is None or is_int64_dtype(dtype)): - raise TypeError('Invalid to pass a non-int64 dtype to RangeIndex') - - @cache_readonly - def _constructor(self): - """ return the class to use for construction """ - return Int64Index - - @cache_readonly - def _data(self): - return np.arange(self._start, self._stop, self._step, dtype=np.int64) - - @cache_readonly - def _int64index(self): - return Int64Index(self._data, name=self.name, fastpath=True) - - def _get_data_as_items(self): - """ return a list of tuples of start, stop, step """ - return [('start', self._start), - ('stop', self._stop), - ('step', self._step)] - - def __reduce__(self): - d = self._get_attributes_dict() - d.update(dict(self._get_data_as_items())) - return _new_Index, (self.__class__, d), None - - def _format_attrs(self): - """ - Return a list of tuples of the (attr, formatted_value) - """ - attrs = self._get_data_as_items() - if self.name is not None: - attrs.append(('name', default_pprint(self.name))) - return attrs - - def _format_data(self): - # we are formatting thru the attributes - return None - - @cache_readonly - def nbytes(self): - """ return the number of bytes in the underlying data """ - return sum([getsizeof(getattr(self, v)) for v in - ['_start', '_stop', '_step']]) - - def memory_usage(self, deep=False): - """ - Memory usage of my values - - Parameters - ---------- - deep : bool - Introspect the data deeply, interrogate - `object` dtypes for system-level memory consumption - - Returns - ------- - bytes used - - Notes - ----- - Memory usage does not include memory consumed by elements that - are not components of the array if deep=False - - See Also - -------- - numpy.ndarray.nbytes - """ - return self.nbytes - - @property - def dtype(self): - return np.dtype(np.int64) - - @property - def is_unique(self): - """ return if the index has unique values """ - return True - - @property - def has_duplicates(self): - return False - - def tolist(self): - return lrange(self._start, self._stop, self._step) - - def _shallow_copy(self, values=None, **kwargs): - """ create a new Index, don't copy the data, use the same object attributes - with passed in attributes taking precedence """ - if values is None: - return RangeIndex(name=self.name, fastpath=True, - **dict(self._get_data_as_items())) - else: - kwargs.setdefault('name', self.name) - return self._int64index._shallow_copy(values, **kwargs) - - @Appender(_index_shared_docs['copy']) - def copy(self, name=None, deep=False, dtype=None, **kwargs): - self._validate_dtype(dtype) - if name is None: - name = self.name - return RangeIndex(name=name, fastpath=True, - **dict(self._get_data_as_items())) - - def argsort(self, *args, **kwargs): - """ - return an ndarray indexer of the underlying data - - See also - -------- - numpy.ndarray.argsort - """ - if self._step > 0: - return np.arange(len(self)) - else: - return np.arange(len(self) - 1, -1, -1) - - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if isinstance(other, RangeIndex): - ls = len(self) - lo = len(other) - return (ls == lo == 0 or - ls == lo == 1 and - self._start == other._start or - ls == lo and - self._start == other._start and - self._step == other._step) - - return super(RangeIndex, self).equals(other) - - def intersection(self, other): - """ - Form the intersection of two Index objects. Sortedness of the result is - not guaranteed - - Parameters - ---------- - other : Index or array-like - - Returns - ------- - intersection : Index - """ - if not isinstance(other, RangeIndex): - return super(RangeIndex, self).intersection(other) - - # check whether intervals intersect - # deals with in- and decreasing ranges - int_low = max(min(self._start, self._stop + 1), - min(other._start, other._stop + 1)) - int_high = min(max(self._stop, self._start + 1), - max(other._stop, other._start + 1)) - if int_high <= int_low: - return RangeIndex() - - # Method hint: linear Diophantine equation - # solve intersection problem - # performance hint: for identical step sizes, could use - # cheaper alternative - gcd, s, t = self._extended_gcd(self._step, other._step) - - # check whether element sets intersect - if (self._start - other._start) % gcd: - return RangeIndex() - - # calculate parameters for the RangeIndex describing the - # intersection disregarding the lower bounds - tmp_start = self._start + (other._start - self._start) * \ - self._step // gcd * s - new_step = self._step * other._step // gcd - new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) - - # adjust index to limiting interval - new_index._start = new_index._min_fitting_element(int_low) - return new_index - - def _min_fitting_element(self, lower_limit): - """Returns the smallest element greater than or equal to the limit""" - no_steps = -(-(lower_limit - self._start) // abs(self._step)) - return self._start + abs(self._step) * no_steps - - def _max_fitting_element(self, upper_limit): - """Returns the largest element smaller than or equal to the limit""" - no_steps = (upper_limit - self._start) // abs(self._step) - return self._start + abs(self._step) * no_steps - - def _extended_gcd(self, a, b): - """ - Extended Euclidean algorithms to solve Bezout's identity: - a*x + b*y = gcd(x, y) - Finds one particular solution for x, y: s, t - Returns: gcd, s, t - """ - s, old_s = 0, 1 - t, old_t = 1, 0 - r, old_r = b, a - while r: - quotient = old_r // r - old_r, r = r, old_r - quotient * r - old_s, s = s, old_s - quotient * s - old_t, t = t, old_t - quotient * t - return old_r, old_s, old_t - - def union(self, other): - """ - Form the union of two Index objects and sorts if possible - - Parameters - ---------- - other : Index or array-like - - Returns - ------- - union : Index - """ - self._assert_can_do_setop(other) - if len(other) == 0 or self.equals(other): - return self - if len(self) == 0: - return other - if isinstance(other, RangeIndex): - start_s, step_s = self._start, self._step - end_s = self._start + self._step * (len(self) - 1) - start_o, step_o = other._start, other._step - end_o = other._start + other._step * (len(other) - 1) - if self._step < 0: - start_s, step_s, end_s = end_s, -step_s, start_s - if other._step < 0: - start_o, step_o, end_o = end_o, -step_o, start_o - if len(self) == 1 and len(other) == 1: - step_s = step_o = abs(self._start - other._start) - elif len(self) == 1: - step_s = step_o - elif len(other) == 1: - step_o = step_s - start_r = min(start_s, start_o) - end_r = max(end_s, end_o) - if step_o == step_s: - if ((start_s - start_o) % step_s == 0 and - (start_s - end_o) <= step_s and - (start_o - end_s) <= step_s): - return RangeIndex(start_r, end_r + step_s, step_s) - if ((step_s % 2 == 0) and - (abs(start_s - start_o) <= step_s / 2) and - (abs(end_s - end_o) <= step_s / 2)): - return RangeIndex(start_r, end_r + step_s / 2, step_s / 2) - elif step_o % step_s == 0: - if ((start_o - start_s) % step_s == 0 and - (start_o + step_s >= start_s) and - (end_o - step_s <= end_s)): - return RangeIndex(start_r, end_r + step_s, step_s) - elif step_s % step_o == 0: - if ((start_s - start_o) % step_o == 0 and - (start_s + step_o >= start_o) and - (end_s - step_o <= end_o)): - return RangeIndex(start_r, end_r + step_o, step_o) - - return self._int64index.union(other) - - def join(self, other, how='left', level=None, return_indexers=False): - """ - *this is an internal non-public method* - - Compute join_index and indexers to conform data - structures to the new index. - - Parameters - ---------- - other : Index - how : {'left', 'right', 'inner', 'outer'} - level : int or level name, default None - return_indexers : boolean, default False - - Returns - ------- - join_index, (left_indexer, right_indexer) - """ - if how == 'outer' and self is not other: - # note: could return RangeIndex in more circumstances - return self._int64index.join(other, how, level, return_indexers) - - return super(RangeIndex, self).join(other, how, level, return_indexers) - - def __len__(self): - """ - return the length of the RangeIndex - """ - return max(0, -(-(self._stop - self._start) // self._step)) - - @property - def size(self): - return len(self) - - def __getitem__(self, key): - """ - Conserve RangeIndex type for scalar and slice keys. - """ - super_getitem = super(RangeIndex, self).__getitem__ - - if np.isscalar(key): - n = int(key) - if n != key: - return super_getitem(key) - if n < 0: - n = len(self) + key - if n < 0 or n > len(self) - 1: - raise IndexError("index {key} is out of bounds for axis 0 " - "with size {size}".format(key=key, - size=len(self))) - return self._start + n * self._step - - if isinstance(key, slice): - - # This is basically PySlice_GetIndicesEx, but delegation to our - # super routines if we don't have integers - - l = len(self) - - # complete missing slice information - step = 1 if key.step is None else key.step - if key.start is None: - start = l - 1 if step < 0 else 0 - else: - start = key.start - - if start < 0: - start += l - if start < 0: - start = -1 if step < 0 else 0 - if start >= l: - start = l - 1 if step < 0 else l - - if key.stop is None: - stop = -1 if step < 0 else l - else: - stop = key.stop - - if stop < 0: - stop += l - if stop < 0: - stop = -1 - if stop > l: - stop = l - - # delegate non-integer slices - if (start != int(start) and - stop != int(stop) and - step != int(step)): - return super_getitem(key) - - # convert indexes to values - start = self._start + self._step * start - stop = self._start + self._step * stop - step = self._step * step - - return RangeIndex(start, stop, step, self.name, fastpath=True) - - # fall back to Int64Index - return super_getitem(key) - - def __floordiv__(self, other): - if com.is_integer(other): - if (len(self) == 0 or - self._start % other == 0 and - self._step % other == 0): - start = self._start // other - step = self._step // other - stop = start + len(self) * step - return RangeIndex(start, stop, step, name=self.name, - fastpath=True) - if len(self) == 1: - start = self._start // other - return RangeIndex(start, start + 1, 1, name=self.name, - fastpath=True) - return self._int64index // other - - @classmethod - def _add_numeric_methods_binary(cls): - """ add in numeric methods, specialized to RangeIndex """ - - def _make_evaluate_binop(op, opstr, reversed=False, step=False): - """ - Parameters - ---------- - op : callable that accepts 2 parms - perform the binary op - opstr : string - string name of ops - reversed : boolean, default False - if this is a reversed op, e.g. radd - step : callable, optional, default to False - op to apply to the step parm if not None - if False, use the existing step - """ - - def _evaluate_numeric_binop(self, other): - - other = self._validate_for_numeric_binop(other, op, opstr) - attrs = self._get_attributes_dict() - attrs = self._maybe_update_attributes(attrs) - - if reversed: - self, other = other, self - - try: - # alppy if we have an override - if step: - rstep = step(self._step, other) - - # we don't have a representable op - # so return a base index - if not is_integer(rstep) or not rstep: - raise ValueError - - else: - rstep = self._step - - rstart = op(self._start, other) - rstop = op(self._stop, other) - - result = RangeIndex(rstart, - rstop, - rstep, - **attrs) - - # for compat with numpy / Int64Index - # even if we can represent as a RangeIndex, return - # as a Float64Index if we have float-like descriptors - if not all([is_integer(x) for x in - [rstart, rstop, rstep]]): - result = result.astype('float64') - - return result - - except (ValueError, TypeError, AttributeError): - pass - - # convert to Int64Index ops - if isinstance(self, RangeIndex): - self = self.values - if isinstance(other, RangeIndex): - other = other.values - - return Index(op(self, other), **attrs) - - return _evaluate_numeric_binop - - cls.__add__ = cls.__radd__ = _make_evaluate_binop( - operator.add, '__add__') - cls.__sub__ = _make_evaluate_binop(operator.sub, '__sub__') - cls.__rsub__ = _make_evaluate_binop( - operator.sub, '__sub__', reversed=True) - cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( - operator.mul, - '__mul__', - step=operator.mul) - cls.__truediv__ = _make_evaluate_binop( - operator.truediv, - '__truediv__', - step=operator.truediv) - cls.__rtruediv__ = _make_evaluate_binop( - operator.truediv, - '__truediv__', - reversed=True, - step=operator.truediv) - if not compat.PY3: - cls.__div__ = _make_evaluate_binop( - operator.div, - '__div__', - step=operator.div) - cls.__rdiv__ = _make_evaluate_binop( - operator.div, - '__div__', - reversed=True, - step=operator.div) - -RangeIndex._add_numeric_methods() -RangeIndex._add_logical_methods() - - -class Float64Index(NumericIndex): - """ - Immutable ndarray implementing an ordered, sliceable set. The basic object - storing axis labels for all pandas objects. Float64Index is a special case - of `Index` with purely floating point labels. - - Parameters - ---------- - data : array-like (1-dimensional) - dtype : NumPy dtype (default: object) - copy : bool - Make a copy of input ndarray - name : object - Name to be stored in the index - - Notes - ----- - An Float64Index instance can **only** contain hashable objects - """ - - _typ = 'float64index' - _engine_type = _index.Float64Engine - _groupby = _algos.groupby_float64 - _arrmap = _algos.arrmap_float64 - _left_indexer_unique = _algos.left_join_indexer_unique_float64 - _left_indexer = _algos.left_join_indexer_float64 - _inner_indexer = _algos.inner_join_indexer_float64 - _outer_indexer = _algos.outer_join_indexer_float64 - - def __new__(cls, data=None, dtype=None, copy=False, name=None, - fastpath=False, **kwargs): - - if fastpath: - return cls._simple_new(data, name) - - data = cls._coerce_to_ndarray(data) - - if issubclass(data.dtype.type, compat.string_types): - cls._string_data_error(data) - - if dtype is None: - dtype = np.float64 - - try: - subarr = np.array(data, dtype=dtype, copy=copy) - except: - raise TypeError('Unsafe NumPy casting, you must explicitly cast') - - # coerce to float64 for storage - if subarr.dtype != np.float64: - subarr = subarr.astype(np.float64) - - return cls._simple_new(subarr, name) - - @property - def inferred_type(self): - return 'floating' - - def astype(self, dtype): - if np.dtype(dtype) not in (np.object, np.float64): - raise TypeError('Setting %s dtype to anything other than ' - 'float64 or object is not supported' % - self.__class__) - return Index(self._values, name=self.name, dtype=dtype) - - def _convert_scalar_indexer(self, key, kind=None): - """ - convert a scalar indexer - - Parameters - ---------- - key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) - - right now we are converting - floats -> ints if the index supports it - """ - - if kind == 'iloc': - if is_integer(key): - return key - return super(Float64Index, self)._convert_scalar_indexer(key, - kind=kind) - - return key - - def _convert_slice_indexer(self, key, kind=None): - """ - convert a slice indexer, by definition these are labels - unless we are iloc - - Parameters - ---------- - key : label of the slice bound - kind : optional, type of the indexing operation (loc/ix/iloc/None) - """ - - # if we are not a slice, then we are done - if not isinstance(key, slice): - return key - - if kind == 'iloc': - return super(Float64Index, self)._convert_slice_indexer(key, - kind=kind) - - # translate to locations - return self.slice_indexer(key.start, key.stop, key.step) - - def _format_native_types(self, na_rep='', float_format=None, decimal='.', - quoting=None, **kwargs): - from pandas.core.format import FloatArrayFormatter - formatter = FloatArrayFormatter(self.values, na_rep=na_rep, - float_format=float_format, - decimal=decimal, quoting=quoting) - return formatter.get_formatted_data() - - def get_value(self, series, key): - """ we always want to get an index value, never a value """ - if not np.isscalar(key): - raise InvalidIndexError - - from pandas.core.indexing import maybe_droplevels - from pandas.core.series import Series - - k = _values_from_object(key) - loc = self.get_loc(k) - new_values = _values_from_object(series)[loc] - - if np.isscalar(new_values) or new_values is None: - return new_values - - new_index = self[loc] - new_index = maybe_droplevels(new_index, k) - return Series(new_values, index=new_index, name=series.name) - - def equals(self, other): - """ - Determines if two Index objects contain the same elements. - """ - if self is other: - return True - - # need to compare nans locations and make sure that they are the same - # since nans don't compare equal this is a bit tricky - try: - if not isinstance(other, Float64Index): - other = self._constructor(other) - if (not is_dtype_equal(self.dtype, other.dtype) or - self.shape != other.shape): - return False - left, right = self._values, other._values - return ((left == right) | (self._isnan & other._isnan)).all() - except TypeError: - # e.g. fails in numpy 1.6 with DatetimeIndex #1681 - return False - - def __contains__(self, other): - if super(Float64Index, self).__contains__(other): - return True - - try: - # if other is a sequence this throws a ValueError - return np.isnan(other) and self.hasnans - except ValueError: - try: - return len(other) <= 1 and _try_get_item(other) in self - except TypeError: - return False - except: - return False - - def get_loc(self, key, method=None, tolerance=None): - try: - if np.all(np.isnan(key)): - nan_idxs = self._nan_idxs - try: - return nan_idxs.item() - except (ValueError, IndexError): - # should only need to catch ValueError here but on numpy - # 1.7 .item() can raise IndexError when NaNs are present - return nan_idxs - except (TypeError, NotImplementedError): - pass - return super(Float64Index, self).get_loc(key, method=method, - tolerance=tolerance) - - @property - def is_all_dates(self): - """ - Checks that all the labels are datetime objects - """ - return False - - @cache_readonly - def is_unique(self): - return super(Float64Index, self).is_unique and self._nan_idxs.size < 2 - - @Appender(Index.isin.__doc__) - def isin(self, values, level=None): - value_set = set(values) - if level is not None: - self._validate_index_level(level) - return lib.ismember_nans(np.array(self), value_set, - isnull(list(value_set)).any()) - - -Float64Index._add_numeric_methods() -Float64Index._add_logical_methods_disabled() - - -class MultiIndex(Index): - """ - A multi-level, or hierarchical, index object for pandas objects - - Parameters - ---------- - levels : sequence of arrays - The unique labels for each level - labels : sequence of arrays - Integers for each level designating which label at each location - sortorder : optional int - Level of sortedness (must be lexicographically sorted by that - level) - names : optional sequence of objects - Names for each of the index levels. (name is accepted for compat) - copy : boolean, default False - Copy the meta-data - verify_integrity : boolean, default True - Check that the levels/labels are consistent and valid - """ - - # initialize to zero-length tuples to make everything work - _typ = 'multiindex' - _names = FrozenList() - _levels = FrozenList() - _labels = FrozenList() - _comparables = ['names'] - rename = Index.set_names - - def __new__(cls, levels=None, labels=None, sortorder=None, names=None, - copy=False, verify_integrity=True, _set_identity=True, - name=None, **kwargs): - - # compat with Index - if name is not None: - names = name - if levels is None or labels is None: - raise TypeError("Must pass both levels and labels") - if len(levels) != len(labels): - raise ValueError('Length of levels and labels must be the same.') - if len(levels) == 0: - raise ValueError('Must pass non-zero number of levels/labels') - if len(levels) == 1: - if names: - name = names[0] - else: - name = None - return Index(levels[0], name=name, copy=True).take(labels[0]) - - result = object.__new__(MultiIndex) - - # we've already validated levels and labels, so shortcut here - result._set_levels(levels, copy=copy, validate=False) - result._set_labels(labels, copy=copy, validate=False) - - if names is not None: - # handles name validation - result._set_names(names) - - if sortorder is not None: - result.sortorder = int(sortorder) - else: - result.sortorder = sortorder - - if verify_integrity: - result._verify_integrity() - if _set_identity: - result._reset_identity() - - return result - - def _verify_integrity(self): - """Raises ValueError if length of levels and labels don't match or any - label would exceed level bounds""" - # NOTE: Currently does not check, among other things, that cached - # nlevels matches nor that sortorder matches actually sortorder. - labels, levels = self.labels, self.levels - if len(levels) != len(labels): - raise ValueError("Length of levels and labels must match. NOTE:" - " this index is in an inconsistent state.") - label_length = len(self.labels[0]) - for i, (level, label) in enumerate(zip(levels, labels)): - if len(label) != label_length: - raise ValueError("Unequal label lengths: %s" % - ([len(lab) for lab in labels])) - if len(label) and label.max() >= len(level): - raise ValueError("On level %d, label max (%d) >= length of" - " level (%d). NOTE: this index is in an" - " inconsistent state" % (i, label.max(), - len(level))) - - def _get_levels(self): - return self._levels - - def _set_levels(self, levels, level=None, copy=False, validate=True, - verify_integrity=False): - # This is NOT part of the levels property because it should be - # externally not allowed to set levels. User beware if you change - # _levels directly - if validate and len(levels) == 0: - raise ValueError('Must set non-zero number of levels.') - if validate and level is None and len(levels) != self.nlevels: - raise ValueError('Length of levels must match number of levels.') - if validate and level is not None and len(levels) != len(level): - raise ValueError('Length of levels must match length of level.') - - if level is None: - new_levels = FrozenList( - _ensure_index(lev, copy=copy)._shallow_copy() - for lev in levels) - else: - level = [self._get_level_number(l) for l in level] - new_levels = list(self._levels) - for l, v in zip(level, levels): - new_levels[l] = _ensure_index(v, copy=copy)._shallow_copy() - new_levels = FrozenList(new_levels) - - names = self.names - self._levels = new_levels - if any(names): - self._set_names(names) - - self._tuples = None - self._reset_cache() - - if verify_integrity: - self._verify_integrity() - - def set_levels(self, levels, level=None, inplace=False, - verify_integrity=True): - """ - Set new levels on MultiIndex. Defaults to returning - new index. - - Parameters - ---------- - levels : sequence or list of sequence - new level(s) to apply - level : int, level name, or sequence of int/level names (default None) - level(s) to set (None for all levels) - inplace : bool - if True, mutates in place - verify_integrity : bool (default True) - if True, checks that levels and labels are compatible - - Returns - ------- - new index (of same type and class...etc) - - - Examples - -------- - >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), - (2, u'one'), (2, u'two')], - names=['foo', 'bar']) - >>> idx.set_levels([['a','b'], [1,2]]) - MultiIndex(levels=[[u'a', u'b'], [1, 2]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) - >>> idx.set_levels(['a','b'], level=0) - MultiIndex(levels=[[u'a', u'b'], [u'one', u'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) - >>> idx.set_levels(['a','b'], level='bar') - MultiIndex(levels=[[1, 2], [u'a', u'b']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) - >>> idx.set_levels([['a','b'], [1,2]], level=[0,1]) - MultiIndex(levels=[[u'a', u'b'], [1, 2]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) - """ - if level is not None and not is_list_like(level): - if not is_list_like(levels): - raise TypeError("Levels must be list-like") - if is_list_like(levels[0]): - raise TypeError("Levels must be list-like") - level = [level] - levels = [levels] - elif level is None or is_list_like(level): - if not is_list_like(levels) or not is_list_like(levels[0]): - raise TypeError("Levels must be list of lists-like") - - if inplace: - idx = self - else: - idx = self._shallow_copy() - idx._reset_identity() - idx._set_levels(levels, level=level, validate=True, - verify_integrity=verify_integrity) - if not inplace: - return idx - - # remove me in 0.14 and change to read only property - __set_levels = deprecate("setting `levels` directly", - partial(set_levels, inplace=True, - verify_integrity=True), - alt_name="set_levels") - levels = property(fget=_get_levels, fset=__set_levels) - - def _get_labels(self): - return self._labels - - def _set_labels(self, labels, level=None, copy=False, validate=True, - verify_integrity=False): - - if validate and level is None and len(labels) != self.nlevels: - raise ValueError("Length of labels must match number of levels") - if validate and level is not None and len(labels) != len(level): - raise ValueError('Length of labels must match length of levels.') - - if level is None: - new_labels = FrozenList( - _ensure_frozen(lab, lev, copy=copy)._shallow_copy() - for lev, lab in zip(self.levels, labels)) - else: - level = [self._get_level_number(l) for l in level] - new_labels = list(self._labels) - for l, lev, lab in zip(level, self.levels, labels): - new_labels[l] = _ensure_frozen( - lab, lev, copy=copy)._shallow_copy() - new_labels = FrozenList(new_labels) - - self._labels = new_labels - self._tuples = None - self._reset_cache() - - if verify_integrity: - self._verify_integrity() - - def set_labels(self, labels, level=None, inplace=False, - verify_integrity=True): - """ - Set new labels on MultiIndex. Defaults to returning - new index. - - Parameters - ---------- - labels : sequence or list of sequence - new labels to apply - level : int, level name, or sequence of int/level names (default None) - level(s) to set (None for all levels) - inplace : bool - if True, mutates in place - verify_integrity : bool (default True) - if True, checks that levels and labels are compatible - - Returns - ------- - new index (of same type and class...etc) - - Examples - -------- - >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), - (2, u'one'), (2, u'two')], - names=['foo', 'bar']) - >>> idx.set_labels([[1,0,1,0], [0,0,1,1]]) - MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[1, 0, 1, 0], [0, 0, 1, 1]], - names=[u'foo', u'bar']) - >>> idx.set_labels([1,0,1,0], level=0) - MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[1, 0, 1, 0], [0, 1, 0, 1]], - names=[u'foo', u'bar']) - >>> idx.set_labels([0,0,1,1], level='bar') - MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[0, 0, 1, 1], [0, 0, 1, 1]], - names=[u'foo', u'bar']) - >>> idx.set_labels([[1,0,1,0], [0,0,1,1]], level=[0,1]) - MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[1, 0, 1, 0], [0, 0, 1, 1]], - names=[u'foo', u'bar']) - """ - if level is not None and not is_list_like(level): - if not is_list_like(labels): - raise TypeError("Labels must be list-like") - if is_list_like(labels[0]): - raise TypeError("Labels must be list-like") - level = [level] - labels = [labels] - elif level is None or is_list_like(level): - if not is_list_like(labels) or not is_list_like(labels[0]): - raise TypeError("Labels must be list of lists-like") - - if inplace: - idx = self - else: - idx = self._shallow_copy() - idx._reset_identity() - idx._set_labels(labels, level=level, verify_integrity=verify_integrity) - if not inplace: - return idx - - # remove me in 0.14 and change to readonly property - __set_labels = deprecate("setting labels directly", - partial(set_labels, inplace=True, - verify_integrity=True), - alt_name="set_labels") - labels = property(fget=_get_labels, fset=__set_labels) - - def copy(self, names=None, dtype=None, levels=None, labels=None, - deep=False, _set_identity=False): - """ - Make a copy of this object. Names, dtype, levels and labels can be - passed and will be set on new copy. - - Parameters - ---------- - names : sequence, optional - dtype : numpy dtype or pandas type, optional - levels : sequence, optional - labels : sequence, optional - - Returns - ------- - copy : MultiIndex - - Notes - ----- - In most cases, there should be no functional difference from using - ``deep``, but if ``deep`` is passed it will attempt to deepcopy. - This could be potentially expensive on large MultiIndex objects. - """ - if deep: - from copy import deepcopy - levels = levels if levels is not None else deepcopy(self.levels) - labels = labels if labels is not None else deepcopy(self.labels) - names = names if names is not None else deepcopy(self.names) - else: - levels = self.levels - labels = self.labels - names = self.names - return MultiIndex(levels=levels, labels=labels, names=names, - sortorder=self.sortorder, verify_integrity=False, - _set_identity=_set_identity) - - def __array__(self, dtype=None): - """ the array interface, return my values """ - return self.values - - def view(self, cls=None): - """ this is defined as a copy with the same identity """ - result = self.copy() - result._id = self._id - return result - - def _shallow_copy_with_infer(self, values=None, **kwargs): - return self._shallow_copy(values, **kwargs) - - def _shallow_copy(self, values=None, **kwargs): - if values is not None: - if 'name' in kwargs: - kwargs['names'] = kwargs.pop('name', None) - # discards freq - kwargs.pop('freq', None) - return MultiIndex.from_tuples(values, **kwargs) - return self.view() - - @cache_readonly - def dtype(self): - return np.dtype('O') - - @cache_readonly - def nbytes(self): - """ return the number of bytes in the underlying data """ - level_nbytes = sum((i.nbytes for i in self.levels)) - label_nbytes = sum((i.nbytes for i in self.labels)) - names_nbytes = sum((getsizeof(i) for i in self.names)) - return level_nbytes + label_nbytes + names_nbytes - - def _format_attrs(self): - """ - Return a list of tuples of the (attr,formatted_value) - """ - attrs = [('levels', default_pprint(self._levels, max_seq_items=False)), - ('labels', default_pprint(self._labels, max_seq_items=False))] - if not all(name is None for name in self.names): - attrs.append(('names', default_pprint(self.names))) - if self.sortorder is not None: - attrs.append(('sortorder', default_pprint(self.sortorder))) - return attrs - - def _format_space(self): - return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) - - def _format_data(self): - # we are formatting thru the attributes - return None - - def __len__(self): - return len(self.labels[0]) - - def _get_names(self): - return FrozenList(level.name for level in self.levels) - - def _set_names(self, names, level=None, validate=True): - """ - sets names on levels. WARNING: mutates! - - Note that you generally want to set this *after* changing levels, so - that it only acts on copies - """ - - names = list(names) - - if validate and level is not None and len(names) != len(level): - raise ValueError('Length of names must match length of level.') - if validate and level is None and len(names) != self.nlevels: - raise ValueError('Length of names must match number of levels in ' - 'MultiIndex.') - - if level is None: - level = range(self.nlevels) - else: - level = [self._get_level_number(l) for l in level] - - # set the name - for l, name in zip(level, names): - self.levels[l].rename(name, inplace=True) - - names = property(fset=_set_names, fget=_get_names, - doc="Names of levels in MultiIndex") - - def _reference_duplicate_name(self, name): - """ - Returns True if the name refered to in self.names is duplicated. - """ - # count the times name equals an element in self.names. - return sum(name == n for n in self.names) > 1 - - def _format_native_types(self, na_rep='nan', **kwargs): - new_levels = [] - new_labels = [] - - # go through the levels and format them - for level, label in zip(self.levels, self.labels): - level = level._format_native_types(na_rep=na_rep, **kwargs) - # add nan values, if there are any - mask = (label == -1) - if mask.any(): - nan_index = len(level) - level = np.append(level, na_rep) - label = label.values() - label[mask] = nan_index - new_levels.append(level) - new_labels.append(label) - - # reconstruct the multi-index - mi = MultiIndex(levels=new_levels, labels=new_labels, names=self.names, - sortorder=self.sortorder, verify_integrity=False) - - return mi.values - - @property - def _constructor(self): - return MultiIndex.from_tuples - - @cache_readonly - def inferred_type(self): - return 'mixed' - - @staticmethod - def _from_elements(values, labels=None, levels=None, names=None, - sortorder=None): - return MultiIndex(levels, labels, names, sortorder=sortorder) - - def _get_level_number(self, level): - try: - count = self.names.count(level) - if count > 1: - raise ValueError('The name %s occurs multiple times, use a ' - 'level number' % level) - level = self.names.index(level) - except ValueError: - if not isinstance(level, int): - raise KeyError('Level %s not found' % str(level)) - elif level < 0: - level += self.nlevels - if level < 0: - orig_level = level - self.nlevels - raise IndexError('Too many levels: Index has only %d ' - 'levels, %d is not a valid level number' % - (self.nlevels, orig_level)) - # Note: levels are zero-based - elif level >= self.nlevels: - raise IndexError('Too many levels: Index has only %d levels, ' - 'not %d' % (self.nlevels, level + 1)) - return level - - _tuples = None - - @property - def values(self): - if self._tuples is not None: - return self._tuples - - values = [] - for lev, lab in zip(self.levels, self.labels): - # Need to box timestamps, etc. - box = hasattr(lev, '_box_values') - # Try to minimize boxing. - if box and len(lev) > len(lab): - taken = lev._box_values(com.take_1d(lev._values, lab)) - elif box: - taken = com.take_1d(lev._box_values(lev._values), lab, - fill_value=_get_na_value(lev.dtype.type)) - else: - taken = com.take_1d(np.asarray(lev._values), lab) - values.append(taken) - - self._tuples = lib.fast_zip(values) - return self._tuples - - # fml - @property - def _is_v1(self): - return False - - @property - def _is_v2(self): - return False - - @property - def _has_complex_internals(self): - # to disable groupby tricks - return True - - @cache_readonly - def is_unique(self): - return not self.duplicated().any() - - @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', - False: 'first'}) - @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) - def duplicated(self, keep='first'): - from pandas.core.groupby import get_group_index - from pandas.hashtable import duplicated_int64 - - shape = map(len, self.levels) - ids = get_group_index(self.labels, shape, sort=False, xnull=False) - - return duplicated_int64(ids, keep) - - @Appender(_index_shared_docs['fillna']) - def fillna(self, value=None, downcast=None): - # isnull is not implemented for MultiIndex - raise NotImplementedError('isnull is not defined for MultiIndex') - - def get_value(self, series, key): - # somewhat broken encapsulation - from pandas.core.indexing import maybe_droplevels - from pandas.core.series import Series - - # Label-based - s = _values_from_object(series) - k = _values_from_object(key) - - def _try_mi(k): - # TODO: what if a level contains tuples?? - loc = self.get_loc(k) - new_values = series._values[loc] - new_index = self[loc] - new_index = maybe_droplevels(new_index, k) - return Series(new_values, index=new_index, name=series.name) - - try: - return self._engine.get_value(s, k) - except KeyError as e1: - try: - return _try_mi(key) - except KeyError: - pass - - try: - return _index.get_value_at(s, k) - except IndexError: - raise - except TypeError: - # generator/iterator-like - if is_iterator(key): - raise InvalidIndexError(key) - else: - raise e1 - except Exception: # pragma: no cover - raise e1 - except TypeError: - - # a Timestamp will raise a TypeError in a multi-index - # rather than a KeyError, try it here - # note that a string that 'looks' like a Timestamp will raise - # a KeyError! (GH5725) - if (isinstance(key, (datetime.datetime, np.datetime64)) or - (compat.PY3 and isinstance(key, compat.string_types))): - try: - return _try_mi(key) - except (KeyError): - raise - except: - pass - - try: - return _try_mi(Timestamp(key)) - except: - pass - - raise InvalidIndexError(key) - - def get_level_values(self, level): - """ - Return vector of label values for requested level, equal to the length - of the index - - Parameters - ---------- - level : int or level name - - Returns - ------- - values : ndarray - """ - num = self._get_level_number(level) - unique = self.levels[num] # .values - labels = self.labels[num] - filled = com.take_1d(unique.values, labels, - fill_value=unique._na_value) - _simple_new = unique._simple_new - values = _simple_new(filled, self.names[num], - freq=getattr(unique, 'freq', None), - tz=getattr(unique, 'tz', None)) - return values - - def format(self, space=2, sparsify=None, adjoin=True, names=False, - na_rep=None, formatter=None): - if len(self) == 0: - return [] - - stringified_levels = [] - for lev, lab in zip(self.levels, self.labels): - na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) - - if len(lev) > 0: - - formatted = lev.take(lab).format(formatter=formatter) - - # we have some NA - mask = lab == -1 - if mask.any(): - formatted = np.array(formatted, dtype=object) - formatted[mask] = na - formatted = formatted.tolist() - - else: - # weird all NA case - formatted = [com.pprint_thing(na if isnull(x) else x, - escape_chars=('\t', '\r', '\n')) - for x in com.take_1d(lev._values, lab)] - stringified_levels.append(formatted) - - result_levels = [] - for lev, name in zip(stringified_levels, self.names): - level = [] - - if names: - level.append(com.pprint_thing(name, - escape_chars=('\t', '\r', '\n')) - if name is not None else '') - - level.extend(np.array(lev, dtype=object)) - result_levels.append(level) - - if sparsify is None: - sparsify = get_option("display.multi_sparse") - - if sparsify: - sentinel = '' - # GH3547 - # use value of sparsify as sentinel, unless it's an obvious - # "Truthey" value - if sparsify not in [True, 1]: - sentinel = sparsify - # little bit of a kludge job for #1217 - result_levels = _sparsify(result_levels, start=int(names), - sentinel=sentinel) - - if adjoin: - from pandas.core.format import _get_adjustment - adj = _get_adjustment() - return adj.adjoin(space, *result_levels).split('\n') - else: - return result_levels - - def _to_safe_for_reshape(self): - """ convert to object if we are a categorical """ - return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) - - def to_hierarchical(self, n_repeat, n_shuffle=1): - """ - Return a MultiIndex reshaped to conform to the - shapes given by n_repeat and n_shuffle. - - Useful to replicate and rearrange a MultiIndex for combination - with another Index with n_repeat items. - - Parameters - ---------- - n_repeat : int - Number of times to repeat the labels on self - n_shuffle : int - Controls the reordering of the labels. If the result is going - to be an inner level in a MultiIndex, n_shuffle will need to be - greater than one. The size of each label must divisible by - n_shuffle. - - Returns - ------- - MultiIndex - - Examples - -------- - >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), - (2, u'one'), (2, u'two')]) - >>> idx.to_hierarchical(3) - MultiIndex(levels=[[1, 2], [u'one', u'two']], - labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) - """ - levels = self.levels - labels = [np.repeat(x, n_repeat) for x in self.labels] - # Assumes that each label is divisible by n_shuffle - labels = [x.reshape(n_shuffle, -1).ravel(1) for x in labels] - names = self.names - return MultiIndex(levels=levels, labels=labels, names=names) - - @property - def is_all_dates(self): - return False - - def is_lexsorted(self): - """ - Return True if the labels are lexicographically sorted - """ - return self.lexsort_depth == self.nlevels - - def is_lexsorted_for_tuple(self, tup): - """ - Return True if we are correctly lexsorted given the passed tuple - """ - return len(tup) <= self.lexsort_depth - - @cache_readonly - def lexsort_depth(self): - if self.sortorder is not None: - if self.sortorder == 0: - return self.nlevels - else: - return 0 - - int64_labels = [com._ensure_int64(lab) for lab in self.labels] - for k in range(self.nlevels, 0, -1): - if lib.is_lexsorted(int64_labels[:k]): - return k - - return 0 - - @classmethod - def from_arrays(cls, arrays, sortorder=None, names=None): - """ - Convert arrays to MultiIndex - - Parameters - ---------- - arrays : list / sequence of array-likes - Each array-like gives one level's value for each data point. - len(arrays) is the number of levels. - sortorder : int or None - Level of sortedness (must be lexicographically sorted by that - level) - - Returns - ------- - index : MultiIndex - - Examples - -------- - >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] - >>> MultiIndex.from_arrays(arrays, names=('number', 'color')) - - See Also - -------- - MultiIndex.from_tuples : Convert list of tuples to MultiIndex - MultiIndex.from_product : Make a MultiIndex from cartesian product - of iterables - """ - from pandas.core.categorical import Categorical - - if len(arrays) == 1: - name = None if names is None else names[0] - return Index(arrays[0], name=name) - - cats = [Categorical.from_array(arr, ordered=True) for arr in arrays] - levels = [c.categories for c in cats] - labels = [c.codes for c in cats] - if names is None: - names = [getattr(arr, "name", None) for arr in arrays] - - return MultiIndex(levels=levels, labels=labels, sortorder=sortorder, - names=names, verify_integrity=False) - - @classmethod - def from_tuples(cls, tuples, sortorder=None, names=None): - """ - Convert list of tuples to MultiIndex - - Parameters - ---------- - tuples : list / sequence of tuple-likes - Each tuple is the index of one row/column. - sortorder : int or None - Level of sortedness (must be lexicographically sorted by that - level) - - Returns - ------- - index : MultiIndex - - Examples - -------- - >>> tuples = [(1, u'red'), (1, u'blue'), - (2, u'red'), (2, u'blue')] - >>> MultiIndex.from_tuples(tuples, names=('number', 'color')) - - See Also - -------- - MultiIndex.from_arrays : Convert list of arrays to MultiIndex - MultiIndex.from_product : Make a MultiIndex from cartesian product - of iterables - """ - if len(tuples) == 0: - # I think this is right? Not quite sure... - raise TypeError('Cannot infer number of levels from empty list') - - if isinstance(tuples, (np.ndarray, Index)): - if isinstance(tuples, Index): - tuples = tuples._values - - arrays = list(lib.tuples_to_object_array(tuples).T) - elif isinstance(tuples, list): - arrays = list(lib.to_object_array_tuples(tuples).T) - else: - arrays = lzip(*tuples) - - return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) - - @classmethod - def from_product(cls, iterables, sortorder=None, names=None): - """ - Make a MultiIndex from the cartesian product of multiple iterables - - Parameters - ---------- - iterables : list / sequence of iterables - Each iterable has unique labels for each level of the index. - sortorder : int or None - Level of sortedness (must be lexicographically sorted by that - level). - names : list / sequence of strings or None - Names for the levels in the index. - - Returns - ------- - index : MultiIndex - - Examples - -------- - >>> numbers = [0, 1, 2] - >>> colors = [u'green', u'purple'] - >>> MultiIndex.from_product([numbers, colors], - names=['number', 'color']) - MultiIndex(levels=[[0, 1, 2], [u'green', u'purple']], - labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], - names=[u'number', u'color']) - - See Also - -------- - MultiIndex.from_arrays : Convert list of arrays to MultiIndex - MultiIndex.from_tuples : Convert list of tuples to MultiIndex - """ - from pandas.core.categorical import Categorical - from pandas.tools.util import cartesian_product - - categoricals = [Categorical.from_array(it, ordered=True) - for it in iterables] - labels = cartesian_product([c.codes for c in categoricals]) - - return MultiIndex(levels=[c.categories for c in categoricals], - labels=labels, sortorder=sortorder, names=names) - - @property - def nlevels(self): - return len(self.levels) - - @property - def levshape(self): - return tuple(len(x) for x in self.levels) - - def __contains__(self, key): - hash(key) - # work around some kind of odd cython bug - try: - self.get_loc(key) - return True - except LookupError: - return False - - def __reduce__(self): - """Necessary for making this object picklable""" - d = dict(levels=[lev for lev in self.levels], - labels=[label for label in self.labels], - sortorder=self.sortorder, names=list(self.names)) - return _new_Index, (self.__class__, d), None - - def __setstate__(self, state): - """Necessary for making this object picklable""" - - if isinstance(state, dict): - levels = state.get('levels') - labels = state.get('labels') - sortorder = state.get('sortorder') - names = state.get('names') - - elif isinstance(state, tuple): - - nd_state, own_state = state - levels, labels, sortorder, names = own_state - - self._set_levels([Index(x) for x in levels], validate=False) - self._set_labels(labels) - self._set_names(names) - self.sortorder = sortorder - self._verify_integrity() - self._reset_identity() - - def __getitem__(self, key): - if np.isscalar(key): - retval = [] - for lev, lab in zip(self.levels, self.labels): - if lab[key] == -1: - retval.append(np.nan) - else: - retval.append(lev[lab[key]]) - - return tuple(retval) - else: - if is_bool_indexer(key): - key = np.asarray(key) - sortorder = self.sortorder - else: - # cannot be sure whether the result will be sorted - sortorder = None - - new_labels = [lab[key] for lab in self.labels] - - return MultiIndex(levels=self.levels, labels=new_labels, - names=self.names, sortorder=sortorder, - verify_integrity=False) - - def take(self, indexer, axis=None): - indexer = com._ensure_platform_int(indexer) - new_labels = [lab.take(indexer) for lab in self.labels] - return MultiIndex(levels=self.levels, labels=new_labels, - names=self.names, verify_integrity=False) - - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : Index - """ - if not isinstance(other, (list, tuple)): - other = [other] - - if all((isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) - for o in other): - arrays = [] - for i in range(self.nlevels): - label = self.get_level_values(i) - appended = [o.get_level_values(i) for o in other] - arrays.append(label.append(appended)) - return MultiIndex.from_arrays(arrays, names=self.names) - - to_concat = (self.values, ) + tuple(k._values for k in other) - new_tuples = np.concatenate(to_concat) - - # if all(isinstance(x, MultiIndex) for x in other): - try: - return MultiIndex.from_tuples(new_tuples, names=self.names) - except: - return Index(new_tuples) - - def argsort(self, *args, **kwargs): - return self.values.argsort(*args, **kwargs) - - def repeat(self, n): - return MultiIndex(levels=self.levels, - labels=[label.view(np.ndarray).repeat(n) - for label in self.labels], names=self.names, - sortorder=self.sortorder, verify_integrity=False) - - def drop(self, labels, level=None, errors='raise'): - """ - Make new MultiIndex with passed list of labels deleted - - Parameters - ---------- - labels : array-like - Must be a list of tuples - level : int or level name, default None - - Returns - ------- - dropped : MultiIndex - """ - if level is not None: - return self._drop_from_level(labels, level) - - try: - if not isinstance(labels, (np.ndarray, Index)): - labels = com._index_labels_to_array(labels) - indexer = self.get_indexer(labels) - mask = indexer == -1 - if mask.any(): - if errors != 'ignore': - raise ValueError('labels %s not contained in axis' % - labels[mask]) - indexer = indexer[~mask] - except Exception: - pass - - inds = [] - for label in labels: - try: - loc = self.get_loc(label) - if isinstance(loc, int): - inds.append(loc) - else: - inds.extend(lrange(loc.start, loc.stop)) - except KeyError: - if errors != 'ignore': - raise - - return self.delete(inds) - - def _drop_from_level(self, labels, level): - labels = com._index_labels_to_array(labels) - i = self._get_level_number(level) - index = self.levels[i] - values = index.get_indexer(labels) - - mask = ~lib.ismember(self.labels[i], set(values)) - - return self[mask] - - def droplevel(self, level=0): - """ - Return Index with requested level removed. If MultiIndex has only 2 - levels, the result will be of Index type not MultiIndex. - - Parameters - ---------- - level : int/level name or list thereof - - Notes - ----- - Does not check if result index is unique or not - - Returns - ------- - index : Index or MultiIndex - """ - levels = level - if not isinstance(levels, (tuple, list)): - levels = [level] - - new_levels = list(self.levels) - new_labels = list(self.labels) - new_names = list(self.names) - - levnums = sorted(self._get_level_number(lev) for lev in levels)[::-1] - - for i in levnums: - new_levels.pop(i) - new_labels.pop(i) - new_names.pop(i) - - if len(new_levels) == 1: - - # set nan if needed - mask = new_labels[0] == -1 - result = new_levels[0].take(new_labels[0]) - if mask.any(): - result = result.putmask(mask, np.nan) - - result.name = new_names[0] - return result - else: - return MultiIndex(levels=new_levels, labels=new_labels, - names=new_names, verify_integrity=False) - - def swaplevel(self, i, j): - """ - Swap level i with level j. Do not change the ordering of anything - - Parameters - ---------- - i, j : int, string (can be mixed) - Level of index to be swapped. Can pass level name as string. - - Returns - ------- - swapped : MultiIndex - """ - new_levels = list(self.levels) - new_labels = list(self.labels) - new_names = list(self.names) - - i = self._get_level_number(i) - j = self._get_level_number(j) - - new_levels[i], new_levels[j] = new_levels[j], new_levels[i] - new_labels[i], new_labels[j] = new_labels[j], new_labels[i] - new_names[i], new_names[j] = new_names[j], new_names[i] - - return MultiIndex(levels=new_levels, labels=new_labels, - names=new_names, verify_integrity=False) - - def reorder_levels(self, order): - """ - Rearrange levels using input order. May not drop or duplicate levels - - Parameters - ---------- - """ - order = [self._get_level_number(i) for i in order] - if len(order) != self.nlevels: - raise AssertionError('Length of order must be same as ' - 'number of levels (%d), got %d' % - (self.nlevels, len(order))) - new_levels = [self.levels[i] for i in order] - new_labels = [self.labels[i] for i in order] - new_names = [self.names[i] for i in order] - - return MultiIndex(levels=new_levels, labels=new_labels, - names=new_names, verify_integrity=False) - - def __getslice__(self, i, j): - return self.__getitem__(slice(i, j)) - - def sortlevel(self, level=0, ascending=True, sort_remaining=True): - """ - Sort MultiIndex at the requested level. The result will respect the - original ordering of the associated factor at that level. - - Parameters - ---------- - level : list-like, int or str, default 0 - If a string is given, must be a name of the level - If list-like must be names or ints of levels. - ascending : boolean, default True - False to sort in descending order - Can also be a list to specify a directed ordering - sort_remaining : sort by the remaining levels after level. - - Returns - ------- - sorted_index : MultiIndex - """ - from pandas.core.groupby import _indexer_from_factorized - - if isinstance(level, (compat.string_types, int)): - level = [level] - level = [self._get_level_number(lev) for lev in level] - sortorder = None - - # we have a directed ordering via ascending - if isinstance(ascending, list): - if not len(level) == len(ascending): - raise ValueError("level must have same length as ascending") - - from pandas.core.groupby import _lexsort_indexer - indexer = _lexsort_indexer(self.labels, orders=ascending) - - # level ordering - else: - - labels = list(self.labels) - shape = list(self.levshape) - - # partition labels and shape - primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level)) - primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level)) - - if sort_remaining: - primary += primary + tuple(labels) - primshp += primshp + tuple(shape) - else: - sortorder = level[0] - - indexer = _indexer_from_factorized(primary, primshp, - compress=False) - - if not ascending: - indexer = indexer[::-1] - - indexer = com._ensure_platform_int(indexer) - new_labels = [lab.take(indexer) for lab in self.labels] - - new_index = MultiIndex(labels=new_labels, levels=self.levels, - names=self.names, sortorder=sortorder, - verify_integrity=False) - - return new_index, indexer - - def get_indexer(self, target, method=None, limit=None, tolerance=None): - """ - Compute indexer and mask for new index given the current index. The - indexer should be then used as an input to ndarray.take to align the - current data to the new index. The mask determines whether labels are - found or not in the current index - - Parameters - ---------- - target : MultiIndex or Index (of tuples) - method : {'pad', 'ffill', 'backfill', 'bfill'} - pad / ffill: propagate LAST valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap - - Notes - ----- - This is a low-level method and probably should be used at your own risk - - Examples - -------- - >>> indexer, mask = index.get_indexer(new_index) - >>> new_values = cur_values.take(indexer) - >>> new_values[-mask] = np.nan - - Returns - ------- - (indexer, mask) : (ndarray, ndarray) - """ - method = _clean_reindex_fill_method(method) - - target = _ensure_index(target) - - target_index = target - if isinstance(target, MultiIndex): - target_index = target._tuple_index - - if not is_object_dtype(target_index.dtype): - return np.ones(len(target_index)) * -1 - - if not self.is_unique: - raise Exception('Reindexing only valid with uniquely valued Index ' - 'objects') - - self_index = self._tuple_index - - if method == 'pad' or method == 'backfill': - if tolerance is not None: - raise NotImplementedError("tolerance not implemented yet " - 'for MultiIndex') - indexer = self_index._get_fill_indexer(target, method, limit) - elif method == 'nearest': - raise NotImplementedError("method='nearest' not implemented yet " - 'for MultiIndex; see GitHub issue 9365') - else: - indexer = self_index._engine.get_indexer(target._values) - - return com._ensure_platform_int(indexer) - - def reindex(self, target, method=None, level=None, limit=None, - tolerance=None): - """ - Create index with target's values (move/add/delete values as necessary) - - Returns - ------- - new_index : pd.MultiIndex - Resulting index - indexer : np.ndarray or None - Indices of output values in original index - - """ - # GH6552: preserve names when reindexing to non-named target - # (i.e. neither Index nor Series). - preserve_names = not hasattr(target, 'names') - - if level is not None: - if method is not None: - raise TypeError('Fill method not supported if level passed') - - # GH7774: preserve dtype/tz if target is empty and not an Index. - target = _ensure_has_len(target) # target may be an iterator - if len(target) == 0 and not isinstance(target, Index): - idx = self.levels[level] - attrs = idx._get_attributes_dict() - attrs.pop('freq', None) # don't preserve freq - target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype), - **attrs) - else: - target = _ensure_index(target) - target, indexer, _ = self._join_level(target, level, how='right', - return_indexers=True, - keep_order=False) - else: - if self.equals(target): - indexer = None - else: - if self.is_unique: - indexer = self.get_indexer(target, method=method, - limit=limit, - tolerance=tolerance) - else: - raise Exception("cannot handle a non-unique multi-index!") - - if not isinstance(target, MultiIndex): - if indexer is None: - target = self - elif (indexer >= 0).all(): - target = self.take(indexer) - else: - # hopefully? - target = MultiIndex.from_tuples(target) - - if (preserve_names and target.nlevels == self.nlevels and - target.names != self.names): - target = target.copy(deep=False) - target.names = self.names - - return target, indexer - - @cache_readonly - def _tuple_index(self): - """ - Convert MultiIndex to an Index of tuples - - Returns - ------- - index : Index - """ - return Index(self._values) - - def get_slice_bound(self, label, side, kind): - if not isinstance(label, tuple): - label = label, - return self._partial_tup_index(label, side=side) - - def slice_locs(self, start=None, end=None, step=None, kind=None): - """ - For an ordered MultiIndex, compute the slice locations for input - labels. They can be tuples representing partial levels, e.g. for a - MultiIndex with 3 levels, you can pass a single value (corresponding to - the first level), or a 1-, 2-, or 3-tuple. - - Parameters - ---------- - start : label or tuple, default None - If None, defaults to the beginning - end : label or tuple - If None, defaults to the end - step : int or None - Slice step - kind : string, optional, defaults None - - Returns - ------- - (start, end) : (int, int) - - Notes - ----- - This function assumes that the data is sorted by the first level - """ - # This function adds nothing to its parent implementation (the magic - # happens in get_slice_bound method), but it adds meaningful doc. - return super(MultiIndex, self).slice_locs(start, end, step, kind=kind) - - def _partial_tup_index(self, tup, side='left'): - if len(tup) > self.lexsort_depth: - raise KeyError('Key length (%d) was greater than MultiIndex' - ' lexsort depth (%d)' % - (len(tup), self.lexsort_depth)) - - n = len(tup) - start, end = 0, len(self) - zipped = zip(tup, self.levels, self.labels) - for k, (lab, lev, labs) in enumerate(zipped): - section = labs[start:end] - - if lab not in lev: - if not lev.is_type_compatible(lib.infer_dtype([lab])): - raise TypeError('Level type mismatch: %s' % lab) - - # short circuit - loc = lev.searchsorted(lab, side=side) - if side == 'right' and loc >= 0: - loc -= 1 - return start + section.searchsorted(loc, side=side) - - idx = lev.get_loc(lab) - if k < n - 1: - end = start + section.searchsorted(idx, side='right') - start = start + section.searchsorted(idx, side='left') - else: - return start + section.searchsorted(idx, side=side) - - def get_loc(self, key, method=None): - """ - Get integer location, slice or boolean mask for requested label or - tuple. If the key is past the lexsort depth, the return may be a - boolean mask array, otherwise it is always a slice or int. - - Parameters - ---------- - key : label or tuple - method : None - - Returns - ------- - loc : int, slice object or boolean mask - """ - if method is not None: - raise NotImplementedError('only the default get_loc method is ' - 'currently supported for MultiIndex') - - def _maybe_to_slice(loc): - '''convert integer indexer to boolean mask or slice if possible''' - if not isinstance(loc, np.ndarray) or loc.dtype != 'int64': - return loc - - loc = lib.maybe_indices_to_slice(loc, len(self)) - if isinstance(loc, slice): - return loc - - mask = np.empty(len(self), dtype='bool') - mask.fill(False) - mask[loc] = True - return mask - - if not isinstance(key, tuple): - loc = self._get_level_indexer(key, level=0) - return _maybe_to_slice(loc) - - keylen = len(key) - if self.nlevels < keylen: - raise KeyError('Key length ({0}) exceeds index depth ({1})' - ''.format(keylen, self.nlevels)) - - if keylen == self.nlevels and self.is_unique: - - def _maybe_str_to_time_stamp(key, lev): - if lev.is_all_dates and not isinstance(key, Timestamp): - try: - return Timestamp(key, tz=getattr(lev, 'tz', None)) - except Exception: - pass - return key - - key = _values_from_object(key) - key = tuple(map(_maybe_str_to_time_stamp, key, self.levels)) - return self._engine.get_loc(key) - - # -- partial selection or non-unique index - # break the key into 2 parts based on the lexsort_depth of the index; - # the first part returns a continuous slice of the index; the 2nd part - # needs linear search within the slice - i = self.lexsort_depth - lead_key, follow_key = key[:i], key[i:] - start, stop = (self.slice_locs(lead_key, lead_key) - if lead_key else (0, len(self))) - - if start == stop: - raise KeyError(key) - - if not follow_key: - return slice(start, stop) - - warnings.warn('indexing past lexsort depth may impact performance.', - PerformanceWarning, stacklevel=10) - - loc = np.arange(start, stop, dtype='int64') - - for i, k in enumerate(follow_key, len(lead_key)): - mask = self.labels[i][loc] == self.levels[i].get_loc(k) - if not mask.all(): - loc = loc[mask] - if not len(loc): - raise KeyError(key) - - return (_maybe_to_slice(loc) if len(loc) != stop - start else - slice(start, stop)) - - def get_loc_level(self, key, level=0, drop_level=True): - """ - Get integer location slice for requested label or tuple - - Parameters - ---------- - key : label or tuple - level : int/level name or list thereof - - Returns - ------- - loc : int or slice object - """ - - def maybe_droplevels(indexer, levels, drop_level): - if not drop_level: - return self[indexer] - # kludgearound - orig_index = new_index = self[indexer] - levels = [self._get_level_number(i) for i in levels] - for i in sorted(levels, reverse=True): - try: - new_index = new_index.droplevel(i) - except: - - # no dropping here - return orig_index - return new_index - - if isinstance(level, (tuple, list)): - if len(key) != len(level): - raise AssertionError('Key for location must have same ' - 'length as number of levels') - result = None - for lev, k in zip(level, key): - loc, new_index = self.get_loc_level(k, level=lev) - if isinstance(loc, slice): - mask = np.zeros(len(self), dtype=bool) - mask[loc] = True - loc = mask - - result = loc if result is None else result & loc - - return result, maybe_droplevels(result, level, drop_level) - - level = self._get_level_number(level) - - # kludge for #1796 - if isinstance(key, list): - key = tuple(key) - - if isinstance(key, tuple) and level == 0: - - try: - if key in self.levels[0]: - indexer = self._get_level_indexer(key, level=level) - new_index = maybe_droplevels(indexer, [0], drop_level) - return indexer, new_index - except TypeError: - pass - - if not any(isinstance(k, slice) for k in key): - - # partial selection - # optionally get indexer to avoid re-calculation - def partial_selection(key, indexer=None): - if indexer is None: - indexer = self.get_loc(key) - ilevels = [i for i in range(len(key)) - if key[i] != slice(None, None)] - return indexer, maybe_droplevels(indexer, ilevels, - drop_level) - - if len(key) == self.nlevels: - - if self.is_unique: - - # here we have a completely specified key, but are - # using some partial string matching here - # GH4758 - all_dates = [(l.is_all_dates and - not isinstance(k, compat.string_types)) - for k, l in zip(key, self.levels)] - can_index_exactly = any(all_dates) - if (any([l.is_all_dates - for k, l in zip(key, self.levels)]) and - not can_index_exactly): - indexer = self.get_loc(key) - - # we have a multiple selection here - if (not isinstance(indexer, slice) or - indexer.stop - indexer.start != 1): - return partial_selection(key, indexer) - - key = tuple(self[indexer].tolist()[0]) - - return (self._engine.get_loc(_values_from_object(key)), - None) - else: - return partial_selection(key) - else: - return partial_selection(key) - else: - indexer = None - for i, k in enumerate(key): - if not isinstance(k, slice): - k = self._get_level_indexer(k, level=i) - if isinstance(k, slice): - # everything - if k.start == 0 and k.stop == len(self): - k = slice(None, None) - else: - k_index = k - - if isinstance(k, slice): - if k == slice(None, None): - continue - else: - raise TypeError(key) - - if indexer is None: - indexer = k_index - else: # pragma: no cover - indexer &= k_index - if indexer is None: - indexer = slice(None, None) - ilevels = [i for i in range(len(key)) - if key[i] != slice(None, None)] - return indexer, maybe_droplevels(indexer, ilevels, drop_level) - else: - indexer = self._get_level_indexer(key, level=level) - return indexer, maybe_droplevels(indexer, [level], drop_level) - - def _get_level_indexer(self, key, level=0, indexer=None): - # return an indexer, boolean array or a slice showing where the key is - # in the totality of values - # if the indexer is provided, then use this - - level_index = self.levels[level] - labels = self.labels[level] - - def convert_indexer(start, stop, step, indexer=indexer, labels=labels): - # given the inputs and the labels/indexer, compute an indexer set - # if we have a provided indexer, then this need not consider - # the entire labels set - - r = np.arange(start, stop, step) - if indexer is not None and len(indexer) != len(labels): - - # we have an indexer which maps the locations in the labels - # that we have already selected (and is not an indexer for the - # entire set) otherwise this is wasteful so we only need to - # examine locations that are in this set the only magic here is - # that the result are the mappings to the set that we have - # selected - from pandas import Series - mapper = Series(indexer) - indexer = labels.take(com._ensure_platform_int(indexer)) - result = Series(Index(indexer).isin(r).nonzero()[0]) - m = result.map(mapper)._values - - else: - m = np.zeros(len(labels), dtype=bool) - m[np.in1d(labels, r, assume_unique=True)] = True - - return m - - if isinstance(key, slice): - # handle a slice, returnig a slice if we can - # otherwise a boolean indexer - - try: - if key.start is not None: - start = level_index.get_loc(key.start) - else: - start = 0 - if key.stop is not None: - stop = level_index.get_loc(key.stop) - else: - stop = len(level_index) - 1 - step = key.step - except KeyError: - - # we have a partial slice (like looking up a partial date - # string) - start = stop = level_index.slice_indexer(key.start, key.stop, - key.step) - step = start.step - - if isinstance(start, slice) or isinstance(stop, slice): - # we have a slice for start and/or stop - # a partial date slicer on a DatetimeIndex generates a slice - # note that the stop ALREADY includes the stopped point (if - # it was a string sliced) - return convert_indexer(start.start, stop.stop, step) - - elif level > 0 or self.lexsort_depth == 0 or step is not None: - # need to have like semantics here to right - # searching as when we are using a slice - # so include the stop+1 (so we include stop) - return convert_indexer(start, stop + 1, step) - else: - # sorted, so can return slice object -> view - i = labels.searchsorted(start, side='left') - j = labels.searchsorted(stop, side='right') - return slice(i, j, step) - - else: - - loc = level_index.get_loc(key) - if level > 0 or self.lexsort_depth == 0: - return np.array(labels == loc, dtype=bool) - else: - # sorted, so can return slice object -> view - i = labels.searchsorted(loc, side='left') - j = labels.searchsorted(loc, side='right') - return slice(i, j) - - def get_locs(self, tup): - """ - Given a tuple of slices/lists/labels/boolean indexer to a level-wise - spec produce an indexer to extract those locations - - Parameters - ---------- - key : tuple of (slices/list/labels) - - Returns - ------- - locs : integer list of locations or boolean indexer suitable - for passing to iloc - """ - - # must be lexsorted to at least as many levels - if not self.is_lexsorted_for_tuple(tup): - raise KeyError('MultiIndex Slicing requires the index to be fully ' - 'lexsorted tuple len ({0}), lexsort depth ' - '({1})'.format(len(tup), self.lexsort_depth)) - - # indexer - # this is the list of all values that we want to select - n = len(self) - indexer = None - - def _convert_to_indexer(r): - # return an indexer - if isinstance(r, slice): - m = np.zeros(n, dtype=bool) - m[r] = True - r = m.nonzero()[0] - elif is_bool_indexer(r): - if len(r) != n: - raise ValueError("cannot index with a boolean indexer " - "that is not the same length as the " - "index") - r = r.nonzero()[0] - return Int64Index(r) - - def _update_indexer(idxr, indexer=indexer): - if indexer is None: - indexer = Index(np.arange(n)) - if idxr is None: - return indexer - return indexer & idxr - - for i, k in enumerate(tup): - - if is_bool_indexer(k): - # a boolean indexer, must be the same length! - k = np.asarray(k) - indexer = _update_indexer(_convert_to_indexer(k), - indexer=indexer) - - elif is_list_like(k): - # a collection of labels to include from this level (these - # are or'd) - indexers = None - for x in k: - try: - idxrs = _convert_to_indexer( - self._get_level_indexer(x, level=i, - indexer=indexer)) - indexers = (idxrs if indexers is None - else indexers | idxrs) - except KeyError: - - # ignore not founds - continue - - if indexers is not None: - indexer = _update_indexer(indexers, indexer=indexer) - else: - - # no matches we are done - return Int64Index([])._values - - elif is_null_slice(k): - # empty slice - indexer = _update_indexer(None, indexer=indexer) - - elif isinstance(k, slice): - - # a slice, include BOTH of the labels - indexer = _update_indexer(_convert_to_indexer( - self._get_level_indexer(k, level=i, indexer=indexer)), - indexer=indexer) - else: - # a single label - indexer = _update_indexer(_convert_to_indexer( - self.get_loc_level(k, level=i, drop_level=False)[0]), - indexer=indexer) - - # empty indexer - if indexer is None: - return Int64Index([])._values - return indexer._values - - def truncate(self, before=None, after=None): - """ - Slice index between two labels / tuples, return new MultiIndex - - Parameters - ---------- - before : label or tuple, can be partial. Default None - None defaults to start - after : label or tuple, can be partial. Default None - None defaults to end - - Returns - ------- - truncated : MultiIndex - """ - if after and before and after < before: - raise ValueError('after < before') - - i, j = self.levels[0].slice_locs(before, after) - left, right = self.slice_locs(before, after) - - new_levels = list(self.levels) - new_levels[0] = new_levels[0][i:j] - - new_labels = [lab[left:right] for lab in self.labels] - new_labels[0] = new_labels[0] - i - - return MultiIndex(levels=new_levels, labels=new_labels, - verify_integrity=False) - - def equals(self, other): - """ - Determines if two MultiIndex objects have the same labeling information - (the levels themselves do not necessarily have to be the same) - - See also - -------- - equal_levels - """ - if self.is_(other): - return True - - if not isinstance(other, MultiIndex): - return array_equivalent(self._values, - _values_from_object(_ensure_index(other))) - - if self.nlevels != other.nlevels: - return False - - if len(self) != len(other): - return False - - for i in range(self.nlevels): - svalues = com.take_nd(np.asarray(self.levels[i]._values), - self.labels[i], allow_fill=False) - ovalues = com.take_nd(np.asarray(other.levels[i]._values), - other.labels[i], allow_fill=False) - if not array_equivalent(svalues, ovalues): - return False - - return True - - def equal_levels(self, other): - """ - Return True if the levels of both MultiIndex objects are the same - - """ - if self.nlevels != other.nlevels: - return False - - for i in range(self.nlevels): - if not self.levels[i].equals(other.levels[i]): - return False - return True - - def union(self, other): - """ - Form the union of two MultiIndex objects, sorting if possible - - Parameters - ---------- - other : MultiIndex or array / Index of tuples - - Returns - ------- - Index - - >>> index.union(index2) - """ - self._assert_can_do_setop(other) - other, result_names = self._convert_can_do_setop(other) - - if len(other) == 0 or self.equals(other): - return self - - uniq_tuples = lib.fast_unique_multiple([self._values, other._values]) - return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, - names=result_names) - - def intersection(self, other): - """ - Form the intersection of two MultiIndex objects, sorting if possible - - Parameters - ---------- - other : MultiIndex or array / Index of tuples - - Returns - ------- - Index - """ - self._assert_can_do_setop(other) - other, result_names = self._convert_can_do_setop(other) - - if self.equals(other): - return self - - self_tuples = self._values - other_tuples = other._values - uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) - if len(uniq_tuples) == 0: - return MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - names=result_names, verify_integrity=False) - else: - return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, - names=result_names) - - def difference(self, other): - """ - Compute sorted set difference of two MultiIndex objects - - Returns - ------- - diff : MultiIndex - """ - self._assert_can_do_setop(other) - other, result_names = self._convert_can_do_setop(other) - - if len(other) == 0: - return self - - if self.equals(other): - return MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - names=result_names, verify_integrity=False) - - difference = sorted(set(self._values) - set(other._values)) - - if len(difference) == 0: - return MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - names=result_names, verify_integrity=False) - else: - return MultiIndex.from_tuples(difference, sortorder=0, - names=result_names) - - def astype(self, dtype): - if not is_object_dtype(np.dtype(dtype)): - raise TypeError('Setting %s dtype to anything other than object ' - 'is not supported' % self.__class__) - return self._shallow_copy() - - def _convert_can_do_setop(self, other): - result_names = self.names - - if not hasattr(other, 'names'): - if len(other) == 0: - other = MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - verify_integrity=False) - else: - msg = 'other must be a MultiIndex or a list of tuples' - try: - other = MultiIndex.from_tuples(other) - except: - raise TypeError(msg) - else: - result_names = self.names if self.names == other.names else None - return other, result_names - - def insert(self, loc, item): - """ - Make new MultiIndex inserting new item at location - - Parameters - ---------- - loc : int - item : tuple - Must be same length as number of levels in the MultiIndex - - Returns - ------- - new_index : Index - """ - # Pad the key with empty strings if lower levels of the key - # aren't specified: - if not isinstance(item, tuple): - item = (item, ) + ('', ) * (self.nlevels - 1) - elif len(item) != self.nlevels: - raise ValueError('Item must have length equal to number of ' - 'levels.') - - new_levels = [] - new_labels = [] - for k, level, labels in zip(item, self.levels, self.labels): - if k not in level: - # have to insert into level - # must insert at end otherwise you have to recompute all the - # other labels - lev_loc = len(level) - level = level.insert(lev_loc, k) - else: - lev_loc = level.get_loc(k) - - new_levels.append(level) - new_labels.append(np.insert(_ensure_int64(labels), loc, lev_loc)) - - return MultiIndex(levels=new_levels, labels=new_labels, - names=self.names, verify_integrity=False) - - def delete(self, loc): - """ - Make new index with passed location deleted - - Returns - ------- - new_index : MultiIndex - """ - new_labels = [np.delete(lab, loc) for lab in self.labels] - return MultiIndex(levels=self.levels, labels=new_labels, - names=self.names, verify_integrity=False) - - get_major_bounds = slice_locs - - __bounds = None - - @property - def _bounds(self): - """ - Return or compute and return slice points for level 0, assuming - sortedness - """ - if self.__bounds is None: - inds = np.arange(len(self.levels[0])) - self.__bounds = self.labels[0].searchsorted(inds) - - return self.__bounds - - def _wrap_joined_index(self, joined, other): - names = self.names if self.names == other.names else None - return MultiIndex.from_tuples(joined, names=names) - - @Appender(Index.isin.__doc__) - def isin(self, values, level=None): - if level is None: - return lib.ismember(np.array(self), set(values)) - else: - num = self._get_level_number(level) - levs = self.levels[num] - labs = self.labels[num] - - sought_labels = levs.isin(values).nonzero()[0] - if levs.size == 0: - return np.zeros(len(labs), dtype=np.bool_) - else: - return np.lib.arraysetops.in1d(labs, sought_labels) - - -MultiIndex._add_numeric_methods_disabled() -MultiIndex._add_logical_methods_disabled() - -# For utility purposes - - -def _sparsify(label_list, start=0, sentinel=''): - pivoted = lzip(*label_list) - k = len(label_list) - - result = pivoted[:start + 1] - prev = pivoted[start] - - for cur in pivoted[start + 1:]: - sparse_cur = [] - - for i, (p, t) in enumerate(zip(prev, cur)): - if i == k - 1: - sparse_cur.append(t) - result.append(sparse_cur) - break - - if p == t: - sparse_cur.append(sentinel) - else: - sparse_cur.extend(cur[i:]) - result.append(sparse_cur) - break - - prev = cur - - return lzip(*result) - - -def _ensure_index(index_like, copy=False): - if isinstance(index_like, Index): - if copy: - index_like = index_like.copy() - return index_like - if hasattr(index_like, 'name'): - return Index(index_like, name=index_like.name, copy=copy) - - # must check for exactly list here because of strict type - # check in clean_index_list - if isinstance(index_like, list): - if type(index_like) != list: - index_like = list(index_like) - # 2200 ? - converted, all_arrays = lib.clean_index_list(index_like) - - if len(converted) > 0 and all_arrays: - return MultiIndex.from_arrays(converted) - else: - index_like = converted - else: - # clean_index_list does the equivalent of copying - # so only need to do this if not list instance - if copy: - from copy import copy - index_like = copy(index_like) - - return Index(index_like) - - -def _ensure_frozen(array_like, categories, copy=False): - array_like = com._coerce_indexer_dtype(array_like, categories) - array_like = array_like.view(FrozenNDArray) - if copy: - array_like = array_like.copy() - return array_like - - -def _validate_join_method(method): - if method not in ['left', 'right', 'inner', 'outer']: - raise ValueError('do not recognize join method %s' % method) - - -# TODO: handle index names! -def _get_combined_index(indexes, intersect=False): - indexes = _get_distinct_indexes(indexes) - if len(indexes) == 0: - return Index([]) - if len(indexes) == 1: - return indexes[0] - if intersect: - index = indexes[0] - for other in indexes[1:]: - index = index.intersection(other) - return index - union = _union_indexes(indexes) - return _ensure_index(union) - - -def _get_distinct_indexes(indexes): - return list(dict((id(x), x) for x in indexes).values()) - - -def _union_indexes(indexes): - if len(indexes) == 0: - raise AssertionError('Must have at least 1 Index to union') - if len(indexes) == 1: - result = indexes[0] - if isinstance(result, list): - result = Index(sorted(result)) - return result - - indexes, kind = _sanitize_and_check(indexes) - - def _unique_indices(inds): - def conv(i): - if isinstance(i, Index): - i = i.tolist() - return i - - return Index(lib.fast_unique_multiple_list([conv(i) for i in inds])) - - if kind == 'special': - result = indexes[0] - - if hasattr(result, 'union_many'): - return result.union_many(indexes[1:]) - else: - for other in indexes[1:]: - result = result.union(other) - return result - elif kind == 'array': - index = indexes[0] - for other in indexes[1:]: - if not index.equals(other): - return _unique_indices(indexes) - - return index - else: - return _unique_indices(indexes) - - -def _trim_front(strings): - """ - Trims zeros and decimal points - """ - trimmed = strings - while len(strings) > 0 and all([x[0] == ' ' for x in trimmed]): - trimmed = [x[1:] for x in trimmed] - return trimmed - - -def _sanitize_and_check(indexes): - kinds = list(set([type(index) for index in indexes])) - - if list in kinds: - if len(kinds) > 1: - indexes = [Index(com._try_sort(x)) if not isinstance(x, Index) else - x for x in indexes] - kinds.remove(list) - else: - return indexes, 'list' - - if len(kinds) > 1 or Index not in kinds: - return indexes, 'special' - else: - return indexes, 'array' - - -def _get_consensus_names(indexes): - - # find the non-none names, need to tupleify to make - # the set hashable, then reverse on return - consensus_names = set([tuple(i.names) for i in indexes - if all(n is not None for n in i.names)]) - if len(consensus_names) == 1: - return list(list(consensus_names)[0]) - return [None] * indexes[0].nlevels - - -def _maybe_box(idx): - from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex - klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex - - if isinstance(idx, klasses): - return idx.asobject - return idx - - -def _all_indexes_same(indexes): - first = indexes[0] - for index in indexes[1:]: - if not first.equals(index): - return False - return True - - -def _get_na_rep(dtype): - return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN') - - -def _get_na_value(dtype): - return {np.datetime64: tslib.NaT, - np.timedelta64: tslib.NaT}.get(dtype, np.nan) - - -def _ensure_has_len(seq): - """If seq is an iterator, put its values into a list.""" - try: - len(seq) - except TypeError: - return list(seq) - else: - return seq +# flake8: noqa +from pandas.indexes.api import * +from pandas.indexes.multi import _sparsify diff --git a/pandas/indexes/__init__.py b/pandas/indexes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py new file mode 100644 index 0000000000000..8482cf325b47d --- /dev/null +++ b/pandas/indexes/api.py @@ -0,0 +1,115 @@ +from .base import (Index, # noqa + _ensure_index, _get_na_value, + InvalidIndexError) +from .category import CategoricalIndex # noqa +from .multi import MultiIndex # noqa +from .numeric import NumericIndex, Float64Index, Int64Index # noqa +from .range import RangeIndex # noqa + +import pandas.core.common as com +import pandas.lib as lib + + +__all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index', + 'CategoricalIndex', 'RangeIndex', + 'InvalidIndexError', + '_ensure_index', '_get_na_value', '_get_combined_index', + '_get_distinct_indexes', '_union_indexes', + '_get_consensus_names', + '_all_indexes_same'] + + +def _get_combined_index(indexes, intersect=False): + # TODO: handle index names! + indexes = _get_distinct_indexes(indexes) + if len(indexes) == 0: + return Index([]) + if len(indexes) == 1: + return indexes[0] + if intersect: + index = indexes[0] + for other in indexes[1:]: + index = index.intersection(other) + return index + union = _union_indexes(indexes) + return _ensure_index(union) + + +def _get_distinct_indexes(indexes): + return list(dict((id(x), x) for x in indexes).values()) + + +def _union_indexes(indexes): + if len(indexes) == 0: + raise AssertionError('Must have at least 1 Index to union') + if len(indexes) == 1: + result = indexes[0] + if isinstance(result, list): + result = Index(sorted(result)) + return result + + indexes, kind = _sanitize_and_check(indexes) + + def _unique_indices(inds): + def conv(i): + if isinstance(i, Index): + i = i.tolist() + return i + + return Index(lib.fast_unique_multiple_list([conv(i) for i in inds])) + + if kind == 'special': + result = indexes[0] + + if hasattr(result, 'union_many'): + return result.union_many(indexes[1:]) + else: + for other in indexes[1:]: + result = result.union(other) + return result + elif kind == 'array': + index = indexes[0] + for other in indexes[1:]: + if not index.equals(other): + return _unique_indices(indexes) + + return index + else: + return _unique_indices(indexes) + + +def _sanitize_and_check(indexes): + kinds = list(set([type(index) for index in indexes])) + + if list in kinds: + if len(kinds) > 1: + indexes = [Index(com._try_sort(x)) + if not isinstance(x, Index) else + x for x in indexes] + kinds.remove(list) + else: + return indexes, 'list' + + if len(kinds) > 1 or Index not in kinds: + return indexes, 'special' + else: + return indexes, 'array' + + +def _get_consensus_names(indexes): + + # find the non-none names, need to tupleify to make + # the set hashable, then reverse on return + consensus_names = set([tuple(i.names) for i in indexes + if all(n is not None for n in i.names)]) + if len(consensus_names) == 1: + return list(list(consensus_names)[0]) + return [None] * indexes[0].nlevels + + +def _all_indexes_same(indexes): + first = indexes[0] + for index in indexes[1:]: + if not first.equals(index): + return False + return True diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py new file mode 100644 index 0000000000000..0147000e4380c --- /dev/null +++ b/pandas/indexes/base.py @@ -0,0 +1,3309 @@ +import datetime +import warnings +import operator + +import numpy as np +import pandas.tslib as tslib +import pandas.lib as lib +import pandas.algos as _algos +import pandas.index as _index +from pandas.lib import Timestamp, Timedelta, is_datetime_array + +from pandas.compat import range, u +from pandas import compat +from pandas.core import algorithms +from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, + IndexOpsMixin) +import pandas.core.base as base +from pandas.util.decorators import (Appender, Substitution, cache_readonly, + deprecate, deprecate_kwarg) +import pandas.core.common as com +from pandas.core.missing import _clean_reindex_fill_method +from pandas.core.common import (isnull, array_equivalent, + is_object_dtype, is_datetimetz, ABCSeries, + ABCPeriodIndex, + _values_from_object, is_float, is_integer, + is_iterator, is_categorical_dtype, + _ensure_object, _ensure_int64, is_bool_indexer, + is_list_like, is_bool_dtype, + is_integer_dtype) +from pandas.core.strings import StringAccessorMixin + +from pandas.core.config import get_option + +# simplify +default_pprint = lambda x, max_seq_items=None: \ + com.pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True, + max_seq_items=max_seq_items) + +__all__ = ['Index'] + +_unsortable_types = frozenset(('mixed', 'mixed-integer')) + +_index_doc_kwargs = dict(klass='Index', inplace='', duplicated='np.array') +_index_shared_docs = dict() + + +def _try_get_item(x): + try: + return x.item() + except AttributeError: + return x + + +class InvalidIndexError(Exception): + pass + + +_o_dtype = np.dtype(object) +_Identity = object + + +def _new_Index(cls, d): + """ This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__ + """ + return cls.__new__(cls, **d) + + +class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): + """ + Immutable ndarray implementing an ordered, sliceable set. The basic object + storing axis labels for all pandas objects + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: object) + copy : bool + Make a copy of input ndarray + name : object + Name to be stored in the index + tupleize_cols : bool (default: True) + When True, attempt to create a MultiIndex if possible + + Notes + ----- + An Index instance can **only** contain hashable objects + """ + # To hand over control to subclasses + _join_precedence = 1 + + # Cython methods + _groupby = _algos.groupby_object + _arrmap = _algos.arrmap_object + _left_indexer_unique = _algos.left_join_indexer_unique_object + _left_indexer = _algos.left_join_indexer_object + _inner_indexer = _algos.inner_join_indexer_object + _outer_indexer = _algos.outer_join_indexer_object + _box_scalars = False + + _typ = 'index' + _data = None + _id = None + name = None + asi8 = None + _comparables = ['name'] + _attributes = ['name'] + _allow_index_ops = True + _allow_datetime_index_ops = False + _allow_period_index_ops = False + _is_numeric_dtype = False + _can_hold_na = True + + # prioritize current class for _shallow_copy_with_infer, + # used to infer integers as datetime-likes + _infer_as_myclass = False + + _engine_type = _index.ObjectEngine + + def __new__(cls, data=None, dtype=None, copy=False, name=None, + fastpath=False, tupleize_cols=True, **kwargs): + + if name is None and hasattr(data, 'name'): + name = data.name + + if fastpath: + return cls._simple_new(data, name) + + from .range import RangeIndex + + # range + if isinstance(data, RangeIndex): + return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) + elif isinstance(data, range): + return RangeIndex.from_range(data, copy=copy, dtype=dtype, + name=name) + + # categorical + if is_categorical_dtype(data) or is_categorical_dtype(dtype): + from .category import CategoricalIndex + return CategoricalIndex(data, copy=copy, name=name, **kwargs) + + # index-like + elif isinstance(data, (np.ndarray, Index, ABCSeries)): + + if (issubclass(data.dtype.type, np.datetime64) or + is_datetimetz(data)): + from pandas.tseries.index import DatetimeIndex + result = DatetimeIndex(data, copy=copy, name=name, **kwargs) + if dtype is not None and _o_dtype == dtype: + return Index(result.to_pydatetime(), dtype=_o_dtype) + else: + return result + + elif issubclass(data.dtype.type, np.timedelta64): + from pandas.tseries.tdi import TimedeltaIndex + result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) + if dtype is not None and _o_dtype == dtype: + return Index(result.to_pytimedelta(), dtype=_o_dtype) + else: + return result + + if dtype is not None: + try: + data = np.array(data, dtype=dtype, copy=copy) + except (TypeError, ValueError): + pass + + # maybe coerce to a sub-class + from pandas.tseries.period import PeriodIndex + if isinstance(data, PeriodIndex): + return PeriodIndex(data, copy=copy, name=name, **kwargs) + if issubclass(data.dtype.type, np.integer): + from .numeric import Int64Index + return Int64Index(data, copy=copy, dtype=dtype, name=name) + elif issubclass(data.dtype.type, np.floating): + from .numeric import Float64Index + return Float64Index(data, copy=copy, dtype=dtype, name=name) + elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): + subarr = data.astype('object') + else: + subarr = com._asarray_tuplesafe(data, dtype=object) + + # _asarray_tuplesafe does not always copy underlying data, + # so need to make sure that this happens + if copy: + subarr = subarr.copy() + + if dtype is None: + inferred = lib.infer_dtype(subarr) + if inferred == 'integer': + from .numeric import Int64Index + return Int64Index(subarr.astype('i8'), copy=copy, + name=name) + elif inferred in ['floating', 'mixed-integer-float']: + from .numeric import Float64Index + return Float64Index(subarr, copy=copy, name=name) + elif inferred == 'boolean': + # don't support boolean explicity ATM + pass + elif inferred != 'string': + if (inferred.startswith('datetime') or + tslib.is_timestamp_array(subarr)): + + if (lib.is_datetime_with_singletz_array(subarr) or + 'tz' in kwargs): + # only when subarr has the same tz + from pandas.tseries.index import DatetimeIndex + return DatetimeIndex(subarr, copy=copy, name=name, + **kwargs) + + elif (inferred.startswith('timedelta') or + lib.is_timedelta_array(subarr)): + from pandas.tseries.tdi import TimedeltaIndex + return TimedeltaIndex(subarr, copy=copy, name=name, + **kwargs) + elif inferred == 'period': + return PeriodIndex(subarr, name=name, **kwargs) + return cls._simple_new(subarr, name) + + elif hasattr(data, '__array__'): + return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, + **kwargs) + elif data is None or np.isscalar(data): + cls._scalar_data_error(data) + else: + if (tupleize_cols and isinstance(data, list) and data and + isinstance(data[0], tuple)): + + # we must be all tuples, otherwise don't construct + # 10697 + if all(isinstance(e, tuple) for e in data): + try: + # must be orderable in py3 + if compat.PY3: + sorted(data) + from .multi import MultiIndex + return MultiIndex.from_tuples( + data, names=name or kwargs.get('names')) + except (TypeError, KeyError): + # python2 - MultiIndex fails on mixed types + pass + # other iterable of some kind + subarr = com._asarray_tuplesafe(data, dtype=object) + return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) + + """ + NOTE for new Index creation: + + - _simple_new: It returns new Index with the same type as the caller. + All metadata (such as name) must be provided by caller's responsibility. + Using _shallow_copy is recommended because it fills these metadata + otherwise specified. + + - _shallow_copy: It returns new Index with the same type (using + _simple_new), but fills caller's metadata otherwise specified. Passed + kwargs will overwrite corresponding metadata. + + - _shallow_copy_with_infer: It returns new Index inferring its type + from passed values. It fills caller's metadata otherwise specified as the + same as _shallow_copy. + + See each method's docstring. + """ + + @classmethod + def _simple_new(cls, values, name=None, dtype=None, **kwargs): + """ + we require the we have a dtype compat for the values + if we are passed a non-dtype compat, then coerce using the constructor + + Must be careful not to recurse. + """ + if not hasattr(values, 'dtype'): + if values is None and dtype is not None: + values = np.empty(0, dtype=dtype) + else: + values = np.array(values, copy=False) + if is_object_dtype(values): + values = cls(values, name=name, dtype=dtype, + **kwargs)._values + + result = object.__new__(cls) + result._data = values + result.name = name + for k, v in compat.iteritems(kwargs): + setattr(result, k, v) + result._reset_identity() + return result + + def _shallow_copy(self, values=None, **kwargs): + """ + create a new Index with the same class as the caller, don't copy the + data, use the same object attributes with passed in attributes taking + precedence + + *this is an internal non-public method* + + Parameters + ---------- + values : the values to create the new Index, optional + kwargs : updates the default attributes for this Index + """ + if values is None: + values = self.values + attributes = self._get_attributes_dict() + attributes.update(kwargs) + return self._simple_new(values, **attributes) + + def _shallow_copy_with_infer(self, values=None, **kwargs): + """ + create a new Index inferring the class with passed value, don't copy + the data, use the same object attributes with passed in attributes + taking precedence + + *this is an internal non-public method* + + Parameters + ---------- + values : the values to create the new Index, optional + kwargs : updates the default attributes for this Index + """ + if values is None: + values = self.values + attributes = self._get_attributes_dict() + attributes.update(kwargs) + attributes['copy'] = False + if self._infer_as_myclass: + try: + return self._constructor(values, **attributes) + except (TypeError, ValueError): + pass + return Index(values, **attributes) + + def _update_inplace(self, result, **kwargs): + # guard when called from IndexOpsMixin + raise TypeError("Index can't be updated inplace") + + def is_(self, other): + """ + More flexible, faster check like ``is`` but that works through views + + Note: this is *not* the same as ``Index.identical()``, which checks + that metadata is also the same. + + Parameters + ---------- + other : object + other object to compare against. + + Returns + ------- + True if both have same underlying data, False otherwise : bool + """ + # use something other than None to be clearer + return self._id is getattr( + other, '_id', Ellipsis) and self._id is not None + + def _reset_identity(self): + """Initializes or resets ``_id`` attribute with new object""" + self._id = _Identity() + + # ndarray compat + def __len__(self): + """ + return the length of the Index + """ + return len(self._data) + + def __array__(self, dtype=None): + """ the array interface, return my values """ + return self._data.view(np.ndarray) + + def __array_wrap__(self, result, context=None): + """ + Gets called after a ufunc + """ + if is_bool_dtype(result): + return result + + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + return Index(result, **attrs) + + @cache_readonly + def dtype(self): + """ return the dtype object of the underlying data """ + return self._data.dtype + + @cache_readonly + def dtype_str(self): + """ return the dtype str of the underlying data """ + return str(self.dtype) + + @property + def values(self): + """ return the underlying data as an ndarray """ + return self._data.view(np.ndarray) + + def get_values(self): + """ return the underlying data as an ndarray """ + return self.values + + # ops compat + def tolist(self): + """ + return a list of the Index values + """ + return list(self.values) + + def repeat(self, n): + """ + return a new Index of the values repeated n times + + See also + -------- + numpy.ndarray.repeat + """ + return self._shallow_copy(self._values.repeat(n)) + + def ravel(self, order='C'): + """ + return an ndarray of the flattened values of the underlying data + + See also + -------- + numpy.ndarray.ravel + """ + return self._values.ravel(order=order) + + # construction helpers + @classmethod + def _scalar_data_error(cls, data): + raise TypeError('{0}(...) must be called with a collection of some ' + 'kind, {1} was passed'.format(cls.__name__, + repr(data))) + + @classmethod + def _string_data_error(cls, data): + raise TypeError('String dtype not supported, you may need ' + 'to explicitly cast to a numeric type') + + @classmethod + def _coerce_to_ndarray(cls, data): + """coerces data to ndarray, raises on scalar data. Converts other + iterables to list first and then to array. Does not touch ndarrays. + """ + + if not isinstance(data, (np.ndarray, Index)): + if data is None or np.isscalar(data): + cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (ABCSeries, list, tuple)): + data = list(data) + data = np.asarray(data) + return data + + def _get_attributes_dict(self): + """ return an attributes dict for my class """ + return dict([(k, getattr(self, k, None)) for k in self._attributes]) + + def view(self, cls=None): + + # we need to see if we are subclassing an + # index type here + if cls is not None and not hasattr(cls, '_typ'): + result = self._data.view(cls) + else: + result = self._shallow_copy() + if isinstance(result, Index): + result._id = self._id + return result + + def _coerce_scalar_to_index(self, item): + """ + we need to coerce a scalar to a compat for our index type + + Parameters + ---------- + item : scalar item to coerce + """ + return Index([item], dtype=self.dtype, **self._get_attributes_dict()) + + _index_shared_docs['copy'] = """ + Make a copy of this object. Name and dtype sets those attributes on + the new object. + + Parameters + ---------- + name : string, optional + deep : boolean, default False + dtype : numpy dtype or pandas type + + Returns + ------- + copy : Index + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + + @Appender(_index_shared_docs['copy']) + def copy(self, name=None, deep=False, dtype=None, **kwargs): + names = kwargs.get('names') + if names is not None and name is not None: + raise TypeError("Can only provide one of `names` and `name`") + if deep: + from copy import deepcopy + new_index = self._shallow_copy(self._data.copy()) + name = name or deepcopy(self.name) + else: + new_index = self._shallow_copy() + name = self.name + if name is not None: + names = [name] + if names: + new_index = new_index.set_names(names) + if dtype: + new_index = new_index.astype(dtype) + return new_index + + __copy__ = copy + + def __unicode__(self): + """ + Return a string representation for this object. + + Invoked by unicode(df) in py2 only. Yields a Unicode String in both + py2/py3. + """ + klass = self.__class__.__name__ + data = self._format_data() + attrs = self._format_attrs() + space = self._format_space() + + prepr = (u(",%s") % + space).join([u("%s=%s") % (k, v) for k, v in attrs]) + + # no data provided, just attributes + if data is None: + data = '' + + res = u("%s(%s%s)") % (klass, data, prepr) + + return res + + def _format_space(self): + + # using space here controls if the attributes + # are line separated or not (the default) + + # max_seq_items = get_option('display.max_seq_items') + # if len(self) > max_seq_items: + # space = "\n%s" % (' ' * (len(klass) + 1)) + return " " + + @property + def _formatter_func(self): + """ + Return the formatted data as a unicode string + """ + return default_pprint + + def _format_data(self): + """ + Return the formatted data as a unicode string + """ + from pandas.core.format import get_console_size, _get_adjustment + display_width, _ = get_console_size() + if display_width is None: + display_width = get_option('display.width') or 80 + + space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) + + n = len(self) + sep = ',' + max_seq_items = get_option('display.max_seq_items') or n + formatter = self._formatter_func + + # do we want to justify (only do so for non-objects) + is_justify = not (self.inferred_type in ('string', 'unicode') or + (self.inferred_type == 'categorical' and + is_object_dtype(self.categories))) + + # are we a truncated display + is_truncated = n > max_seq_items + + # adj can optionaly handle unicode eastern asian width + adj = _get_adjustment() + + def _extend_line(s, line, value, display_width, next_line_prefix): + + if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= + display_width): + s += line.rstrip() + line = next_line_prefix + line += value + return s, line + + def best_len(values): + if values: + return max([adj.len(x) for x in values]) + else: + return 0 + + if n == 0: + summary = '[], ' + elif n == 1: + first = formatter(self[0]) + summary = '[%s], ' % first + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = '[%s, %s], ' % (first, last) + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] + else: + head = [] + tail = [formatter(x) for x in self] + + # adjust all values to max length if needed + if is_justify: + + # however, if we are not truncated and we are only a single + # line, then don't justify + if (is_truncated or + not (len(', '.join(head)) < display_width and + len(', '.join(tail)) < display_width)): + max_len = max(best_len(head), best_len(tail)) + head = [x.rjust(max_len) for x in head] + tail = [x.rjust(max_len) for x in tail] + + summary = "" + line = space2 + + for i in range(len(head)): + word = head[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + + if is_truncated: + # remove trailing space of last line + summary += line.rstrip() + space2 + '...' + line = space2 + + for i in range(len(tail) - 1): + word = tail[i] + sep + ' ' + summary, line = _extend_line(summary, line, word, + display_width, space2) + + # last value: no sep added + 1 space of width used for trailing ',' + summary, line = _extend_line(summary, line, tail[-1], + display_width - 2, space2) + summary += line + summary += '],' + + if len(summary) > (display_width): + summary += space1 + else: # one row + summary += ' ' + + # remove initial space + summary = '[' + summary[len(space2):] + + return summary + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + attrs = [] + attrs.append(('dtype', "'%s'" % self.dtype)) + if self.name is not None: + attrs.append(('name', default_pprint(self.name))) + max_seq_items = get_option('display.max_seq_items') or len(self) + if len(self) > max_seq_items: + attrs.append(('length', len(self))) + return attrs + + def to_series(self, **kwargs): + """ + Create a Series with both index and values equal to the index keys + useful with map for returning an indexer based on an index + + Returns + ------- + Series : dtype will be based on the type of the Index values. + """ + + from pandas import Series + return Series(self._to_embed(), index=self, name=self.name) + + def _to_embed(self, keep_tz=False): + """ + *this is an internal non-public method* + + return an array repr of this object, potentially casting to object + + """ + return self.values.copy() + + def astype(self, dtype): + return Index(self.values.astype(dtype), name=self.name, dtype=dtype) + + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self + + def to_datetime(self, dayfirst=False): + """ + For an Index containing strings or datetime.datetime objects, attempt + conversion to DatetimeIndex + """ + from pandas.tseries.index import DatetimeIndex + if self.inferred_type == 'string': + from dateutil.parser import parse + parser = lambda x: parse(x, dayfirst=dayfirst) + parsed = lib.try_parse_dates(self.values, parser=parser) + return DatetimeIndex(parsed) + else: + return DatetimeIndex(self.values) + + def _assert_can_do_setop(self, other): + if not com.is_list_like(other): + raise TypeError('Input must be Index or array-like') + return True + + def _convert_can_do_setop(self, other): + if not isinstance(other, Index): + other = Index(other, name=self.name) + result_name = self.name + else: + result_name = self.name if self.name == other.name else None + return other, result_name + + @property + def nlevels(self): + return 1 + + def _get_names(self): + return FrozenList((self.name, )) + + def _set_names(self, values, level=None): + if len(values) != 1: + raise ValueError('Length of new names must be 1, got %d' % + len(values)) + self.name = values[0] + + names = property(fset=_set_names, fget=_get_names) + + def set_names(self, names, level=None, inplace=False): + """ + Set new names on index. Defaults to returning new index. + + Parameters + ---------- + names : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + inplace : bool + if True, mutates in place + + Returns + ------- + new index (of same type and class...etc) [if inplace, returns None] + + Examples + -------- + >>> Index([1, 2, 3, 4]).set_names('foo') + Int64Index([1, 2, 3, 4], dtype='int64') + >>> Index([1, 2, 3, 4]).set_names(['foo']) + Int64Index([1, 2, 3, 4], dtype='int64') + >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), + (2, u'one'), (2, u'two')], + names=['foo', 'bar']) + >>> idx.set_names(['baz', 'quz']) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'baz', u'quz']) + >>> idx.set_names('baz', level=0) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'baz', u'bar']) + """ + if level is not None and self.nlevels == 1: + raise ValueError('Level must be None for non-MultiIndex') + + if level is not None and not is_list_like(level) and is_list_like( + names): + raise TypeError("Names must be a string") + + if not is_list_like(names) and level is None and self.nlevels > 1: + raise TypeError("Must pass list-like as `names`.") + + if not is_list_like(names): + names = [names] + if level is not None and not is_list_like(level): + level = [level] + + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._set_names(names, level=level) + if not inplace: + return idx + + def rename(self, name, inplace=False): + """ + Set new names on index. Defaults to returning new index. + + Parameters + ---------- + name : str or list + name to set + inplace : bool + if True, mutates in place + + Returns + ------- + new index (of same type and class...etc) [if inplace, returns None] + """ + return self.set_names([name], inplace=inplace) + + @property + def _has_complex_internals(self): + # to disable groupby tricks in MultiIndex + return False + + def summary(self, name=None): + if len(self) > 0: + head = self[0] + if (hasattr(head, 'format') and + not isinstance(head, compat.string_types)): + head = head.format() + tail = self[-1] + if (hasattr(tail, 'format') and + not isinstance(tail, compat.string_types)): + tail = tail.format() + index_summary = ', %s to %s' % (com.pprint_thing(head), + com.pprint_thing(tail)) + else: + index_summary = '' + + if name is None: + name = type(self).__name__ + return '%s: %s entries%s' % (name, len(self), index_summary) + + def _mpl_repr(self): + # how to represent ourselves to matplotlib + return self.values + + _na_value = np.nan + """The expected NA value to use with this index.""" + + @property + def is_monotonic(self): + """ alias for is_monotonic_increasing (deprecated) """ + return self._engine.is_monotonic_increasing + + @property + def is_monotonic_increasing(self): + """ + return if the index is monotonic increasing (only equal or + increasing) values. + """ + return self._engine.is_monotonic_increasing + + @property + def is_monotonic_decreasing(self): + """ + return if the index is monotonic decreasing (only equal or + decreasing) values. + """ + return self._engine.is_monotonic_decreasing + + def is_lexsorted_for_tuple(self, tup): + return True + + @cache_readonly(allow_setting=True) + def is_unique(self): + """ return if the index has unique values """ + return self._engine.is_unique + + @property + def has_duplicates(self): + return not self.is_unique + + def is_boolean(self): + return self.inferred_type in ['boolean'] + + def is_integer(self): + return self.inferred_type in ['integer'] + + def is_floating(self): + return self.inferred_type in ['floating', 'mixed-integer-float'] + + def is_numeric(self): + return self.inferred_type in ['integer', 'floating'] + + def is_object(self): + return is_object_dtype(self.dtype) + + def is_categorical(self): + return self.inferred_type in ['categorical'] + + def is_mixed(self): + return 'mixed' in self.inferred_type + + def holds_integer(self): + return self.inferred_type in ['integer', 'mixed-integer'] + + def _convert_scalar_indexer(self, key, kind=None): + """ + convert a scalar indexer + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + + right now we are converting + floats -> ints if the index supports it + """ + + def to_int(): + ikey = int(key) + if ikey != key: + return self._invalid_indexer('label', key) + return ikey + + if kind == 'iloc': + if is_integer(key): + return key + elif is_float(key): + key = to_int() + warnings.warn("scalar indexers for index type {0} should be " + "integers and not floating point".format( + type(self).__name__), + FutureWarning, stacklevel=5) + return key + return self._invalid_indexer('label', key) + + if is_float(key): + if isnull(key): + return self._invalid_indexer('label', key) + warnings.warn("scalar indexers for index type {0} should be " + "integers and not floating point".format( + type(self).__name__), + FutureWarning, stacklevel=3) + return to_int() + + return key + + def _convert_slice_indexer_getitem(self, key, is_index_slice=False): + """ called from the getitem slicers, determine how to treat the key + whether positional or not """ + if self.is_integer() or is_index_slice: + return key + return self._convert_slice_indexer(key) + + def _convert_slice_indexer(self, key, kind=None): + """ + convert a slice indexer. disallow floats in the start/stop/step + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + """ + + # if we are not a slice, then we are done + if not isinstance(key, slice): + return key + + # validate iloc + if kind == 'iloc': + + # need to coerce to_int if needed + def f(c): + v = getattr(key, c) + if v is None or is_integer(v): + return v + + # warn if it's a convertible float + if v == int(v): + warnings.warn("slice indexers when using iloc should be " + "integers and not floating point", + FutureWarning, stacklevel=7) + return int(v) + + self._invalid_indexer('slice {0} value'.format(c), v) + + return slice(*[f(c) for c in ['start', 'stop', 'step']]) + + # validate slicers + def validate(v): + if v is None or is_integer(v): + return True + + # dissallow floats (except for .ix) + elif is_float(v): + if kind == 'ix': + return True + + return False + + return True + + for c in ['start', 'stop', 'step']: + v = getattr(key, c) + if not validate(v): + self._invalid_indexer('slice {0} value'.format(c), v) + + # figure out if this is a positional indexer + start, stop, step = key.start, key.stop, key.step + + def is_int(v): + return v is None or is_integer(v) + + is_null_slicer = start is None and stop is None + is_index_slice = is_int(start) and is_int(stop) + is_positional = is_index_slice and not self.is_integer() + + if kind == 'getitem': + return self._convert_slice_indexer_getitem( + key, is_index_slice=is_index_slice) + + # convert the slice to an indexer here + + # if we are mixed and have integers + try: + if is_positional and self.is_mixed(): + # TODO: i, j are not used anywhere + if start is not None: + i = self.get_loc(start) # noqa + if stop is not None: + j = self.get_loc(stop) # noqa + is_positional = False + except KeyError: + if self.inferred_type == 'mixed-integer-float': + raise + + if is_null_slicer: + indexer = key + elif is_positional: + indexer = key + else: + try: + indexer = self.slice_indexer(start, stop, step) + except Exception: + if is_index_slice: + if self.is_integer(): + raise + else: + indexer = key + else: + raise + + return indexer + + def _convert_list_indexer(self, keyarr, kind=None): + """ + passed a key that is tuplesafe that is integer based + and we have a mixed index (e.g. number/labels). figure out + the indexer. return None if we can't help + """ + if (kind in [None, 'iloc', 'ix'] and + is_integer_dtype(keyarr) and not self.is_floating() and + not isinstance(keyarr, ABCPeriodIndex)): + + if self.inferred_type == 'mixed-integer': + indexer = self.get_indexer(keyarr) + if (indexer >= 0).all(): + return indexer + # missing values are flagged as -1 by get_indexer and negative + # indices are already converted to positive indices in the + # above if-statement, so the negative flags are changed to + # values outside the range of indices so as to trigger an + # IndexError in maybe_convert_indices + indexer[indexer < 0] = len(self) + from pandas.core.indexing import maybe_convert_indices + return maybe_convert_indices(indexer, len(self)) + + elif not self.inferred_type == 'integer': + keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) + return keyarr + + return None + + def _invalid_indexer(self, form, key): + """ consistent invalid indexer message """ + raise TypeError("cannot do {form} indexing on {klass} with these " + "indexers [{key}] of {kind}".format( + form=form, klass=type(self), key=key, + kind=type(key))) + + def get_duplicates(self): + from collections import defaultdict + counter = defaultdict(lambda: 0) + for k in self.values: + counter[k] += 1 + return sorted(k for k, v in compat.iteritems(counter) if v > 1) + + _get_duplicates = get_duplicates + + def _cleanup(self): + self._engine.clear_mapping() + + @cache_readonly + def _constructor(self): + return type(self) + + @cache_readonly + def _engine(self): + # property, for now, slow to look up + return self._engine_type(lambda: self.values, len(self)) + + def _validate_index_level(self, level): + """ + Validate index level. + + For single-level Index getting level number is a no-op, but some + verification must be done like in MultiIndex. + + """ + if isinstance(level, int): + if level < 0 and level != -1: + raise IndexError("Too many levels: Index has only 1 level," + " %d is not a valid level number" % (level, )) + elif level > 0: + raise IndexError("Too many levels:" + " Index has only 1 level, not %d" % + (level + 1)) + elif level != self.name: + raise KeyError('Level %s must be same as name (%s)' % + (level, self.name)) + + def _get_level_number(self, level): + self._validate_index_level(level) + return 0 + + @cache_readonly + def inferred_type(self): + """ return a string of the type inferred from the values """ + return lib.infer_dtype(self) + + def is_type_compatible(self, kind): + return kind == self.inferred_type + + @cache_readonly + def is_all_dates(self): + if self._data is None: + return False + return is_datetime_array(_ensure_object(self.values)) + + def __iter__(self): + return iter(self.values) + + def __reduce__(self): + d = dict(data=self._data) + d.update(self._get_attributes_dict()) + return _new_Index, (self.__class__, d), None + + def __setstate__(self, state): + """Necessary for making this object picklable""" + + if isinstance(state, dict): + self._data = state.pop('data') + for k, v in compat.iteritems(state): + setattr(self, k, v) + + elif isinstance(state, tuple): + + if len(state) == 2: + nd_state, own_state = state + data = np.empty(nd_state[1], dtype=nd_state[2]) + np.ndarray.__setstate__(data, nd_state) + self.name = own_state[0] + + else: # pragma: no cover + data = np.empty(state) + np.ndarray.__setstate__(data, state) + + self._data = data + self._reset_identity() + else: + raise Exception("invalid pickle state") + + _unpickle_compat = __setstate__ + + def __deepcopy__(self, memo=None): + if memo is None: + memo = {} + return self.copy(deep=True) + + def __nonzero__(self): + raise ValueError("The truth value of a {0} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + .format(self.__class__.__name__)) + + __bool__ = __nonzero__ + + def __contains__(self, key): + hash(key) + # work around some kind of odd cython bug + try: + return key in self._engine + except TypeError: + return False + + def __hash__(self): + raise TypeError("unhashable type: %r" % type(self).__name__) + + def __setitem__(self, key, value): + raise TypeError("Index does not support mutable operations") + + def __getitem__(self, key): + """ + Override numpy.ndarray's __getitem__ method to work as desired. + + This function adds lists and Series as valid boolean indexers + (ndarrays only supports ndarray with dtype=bool). + + If resulting ndim != 1, plain ndarray is returned instead of + corresponding `Index` subclass. + + """ + # There's no custom logic to be implemented in __getslice__, so it's + # not overloaded intentionally. + getitem = self._data.__getitem__ + promote = self._shallow_copy + + if np.isscalar(key): + return getitem(key) + + if isinstance(key, slice): + # This case is separated from the conditional above to avoid + # pessimization of basic indexing. + return promote(getitem(key)) + + if is_bool_indexer(key): + key = np.asarray(key) + + key = _values_from_object(key) + result = getitem(key) + if not np.isscalar(result): + return promote(result) + else: + return result + + def _ensure_compat_append(self, other): + """ + prepare the append + + Returns + ------- + list of to_concat, name of result Index + """ + name = self.name + to_concat = [self] + + if isinstance(other, (list, tuple)): + to_concat = to_concat + list(other) + else: + to_concat.append(other) + + for obj in to_concat: + if (isinstance(obj, Index) and obj.name != name and + obj.name is not None): + name = None + break + + to_concat = self._ensure_compat_concat(to_concat) + to_concat = [x._values if isinstance(x, Index) else x + for x in to_concat] + return to_concat, name + + def append(self, other): + """ + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + """ + to_concat, name = self._ensure_compat_append(other) + attribs = self._get_attributes_dict() + attribs['name'] = name + return self._shallow_copy_with_infer( + np.concatenate(to_concat), **attribs) + + @staticmethod + def _ensure_compat_concat(indexes): + from pandas.tseries.api import (DatetimeIndex, PeriodIndex, + TimedeltaIndex) + klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex + + is_ts = [isinstance(idx, klasses) for idx in indexes] + + if any(is_ts) and not all(is_ts): + return [_maybe_box(idx) for idx in indexes] + + return indexes + + def take(self, indices, axis=0, allow_fill=True, fill_value=None): + """ + return a new Index of the values selected by the indexer + + For internal compatibility with numpy arrays. + + # filling must always be None/nan here + # but is passed thru internally + + See also + -------- + numpy.ndarray.take + """ + + indices = com._ensure_platform_int(indices) + taken = self.values.take(indices) + return self._shallow_copy(taken) + + @cache_readonly + def _isnan(self): + """ return if each value is nan""" + if self._can_hold_na: + return isnull(self) + else: + # shouldn't reach to this condition by checking hasnans beforehand + values = np.empty(len(self), dtype=np.bool_) + values.fill(False) + return values + + @cache_readonly + def _nan_idxs(self): + if self._can_hold_na: + w, = self._isnan.nonzero() + return w + else: + return np.array([], dtype=np.int64) + + @cache_readonly + def hasnans(self): + """ return if I have any nans; enables various perf speedups """ + if self._can_hold_na: + return self._isnan.any() + else: + return False + + def _convert_for_op(self, value): + """ Convert value to be insertable to ndarray """ + return value + + def _assert_can_do_op(self, value): + """ Check value is valid for scalar op """ + if not lib.isscalar(value): + msg = "'value' must be a scalar, passed: {0}" + raise TypeError(msg.format(type(value).__name__)) + + def putmask(self, mask, value): + """ + return a new Index of the values set with the mask + + See also + -------- + numpy.ndarray.putmask + """ + values = self.values.copy() + try: + np.putmask(values, mask, self._convert_for_op(value)) + return self._shallow_copy(values) + except (ValueError, TypeError): + # coerces to object + return self.astype(object).putmask(mask, value) + + def format(self, name=False, formatter=None, **kwargs): + """ + Render a string representation of the Index + """ + header = [] + if name: + header.append(com.pprint_thing(self.name, + escape_chars=('\t', '\r', '\n')) if + self.name is not None else '') + + if formatter is not None: + return header + list(self.map(formatter)) + + return self._format_with_header(header, **kwargs) + + def _format_with_header(self, header, na_rep='NaN', **kwargs): + values = self.values + + from pandas.core.format import format_array + + if is_categorical_dtype(values.dtype): + values = np.array(values) + elif is_object_dtype(values.dtype): + values = lib.maybe_convert_objects(values, safe=1) + + if is_object_dtype(values.dtype): + result = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) + for x in values] + + # could have nans + mask = isnull(values) + if mask.any(): + result = np.array(result) + result[mask] = na_rep + result = result.tolist() + + else: + result = _trim_front(format_array(values, None, justify='left')) + return header + result + + def to_native_types(self, slicer=None, **kwargs): + """ slice and dice then format """ + values = self + if slicer is not None: + values = values[slicer] + return values._format_native_types(**kwargs) + + def _format_native_types(self, na_rep='', quoting=None, **kwargs): + """ actually format my specific types """ + mask = isnull(self) + if not self.is_object() and not quoting: + values = np.asarray(self).astype(str) + else: + values = np.array(self, dtype=object, copy=True) + + values[mask] = na_rep + return values + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + return array_equivalent(_values_from_object(self), + _values_from_object(other)) + + def identical(self, other): + """Similar to equals, but check that other comparable attributes are + also equal + """ + return (self.equals(other) and + all((getattr(self, c, None) == getattr(other, c, None) + for c in self._comparables)) and + type(self) == type(other)) + + def asof(self, label): + """ + For a sorted index, return the most recent label up to and including + the passed label. Return NaN if not found. + + See also + -------- + get_loc : asof is a thin wrapper around get_loc with method='pad' + """ + try: + loc = self.get_loc(label, method='pad') + except KeyError: + return _get_na_value(self.dtype) + else: + if isinstance(loc, slice): + loc = loc.indices(len(self))[-1] + return self[loc] + + def asof_locs(self, where, mask): + """ + where : array of timestamps + mask : array of booleans where data is not NA + + """ + locs = self.values[mask].searchsorted(where.values, side='right') + + locs = np.where(locs > 0, locs - 1, 0) + result = np.arange(len(self))[mask].take(locs) + + first = mask.argmax() + result[(locs == 0) & (where < self.values[first])] = -1 + + return result + + def sort_values(self, return_indexer=False, ascending=True): + """ + Return sorted copy of Index + """ + _as = self.argsort() + if not ascending: + _as = _as[::-1] + + sorted_index = self.take(_as) + + if return_indexer: + return sorted_index, _as + else: + return sorted_index + + def order(self, return_indexer=False, ascending=True): + """ + Return sorted copy of Index + + DEPRECATED: use :meth:`Index.sort_values` + """ + warnings.warn("order is deprecated, use sort_values(...)", + FutureWarning, stacklevel=2) + return self.sort_values(return_indexer=return_indexer, + ascending=ascending) + + def sort(self, *args, **kwargs): + raise TypeError("cannot sort an Index object in-place, use " + "sort_values instead") + + def sortlevel(self, level=None, ascending=True, sort_remaining=None): + """ + + For internal compatibility with with the Index API + + Sort the Index. This is for compat with MultiIndex + + Parameters + ---------- + ascending : boolean, default True + False to sort in descending order + + level, sort_remaining are compat parameters + + Returns + ------- + sorted_index : Index + """ + return self.sort_values(return_indexer=True, ascending=ascending) + + def shift(self, periods=1, freq=None): + """ + Shift Index containing datetime objects by input number of periods and + DateOffset + + Returns + ------- + shifted : Index + """ + raise NotImplementedError("Not supported for type %s" % + type(self).__name__) + + def argsort(self, *args, **kwargs): + """ + return an ndarray indexer of the underlying data + + See also + -------- + numpy.ndarray.argsort + """ + result = self.asi8 + if result is None: + result = np.array(self) + return result.argsort(*args, **kwargs) + + def __add__(self, other): + if com.is_list_like(other): + warnings.warn("using '+' to provide set union with Indexes is " + "deprecated, use '|' or .union()", FutureWarning, + stacklevel=2) + if isinstance(other, Index): + return self.union(other) + return Index(np.array(self) + other) + + def __radd__(self, other): + if is_list_like(other): + warnings.warn("using '+' to provide set union with Indexes is " + "deprecated, use '|' or .union()", FutureWarning, + stacklevel=2) + return Index(other + np.array(self)) + + __iadd__ = __add__ + + def __sub__(self, other): + warnings.warn("using '-' to provide set differences with Indexes is " + "deprecated, use .difference()", FutureWarning, + stacklevel=2) + return self.difference(other) + + def __and__(self, other): + return self.intersection(other) + + def __or__(self, other): + return self.union(other) + + def __xor__(self, other): + return self.sym_diff(other) + + def union(self, other): + """ + Form the union of two Index objects and sorts if possible. + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + union : Index + + Examples + -------- + + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.union(idx2) + Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + + """ + self._assert_can_do_setop(other) + other = _ensure_index(other) + + if len(other) == 0 or self.equals(other): + return self + + if len(self) == 0: + return other + + if not com.is_dtype_equal(self.dtype, other.dtype): + this = self.astype('O') + other = other.astype('O') + return this.union(other) + + if self.is_monotonic and other.is_monotonic: + try: + result = self._outer_indexer(self.values, other._values)[0] + except TypeError: + # incomparable objects + result = list(self.values) + + # worth making this faster? a very unusual case + value_set = set(self.values) + result.extend([x for x in other._values if x not in value_set]) + else: + indexer = self.get_indexer(other) + indexer, = (indexer == -1).nonzero() + + if len(indexer) > 0: + other_diff = com.take_nd(other._values, indexer, + allow_fill=False) + result = com._concat_compat((self.values, other_diff)) + + try: + self.values[0] < other_diff[0] + except TypeError as e: + warnings.warn("%s, sort order is undefined for " + "incomparable objects" % e, RuntimeWarning, + stacklevel=3) + else: + types = frozenset((self.inferred_type, + other.inferred_type)) + if not types & _unsortable_types: + result.sort() + + else: + result = self.values + + try: + result = np.sort(result) + except TypeError as e: + warnings.warn("%s, sort order is undefined for " + "incomparable objects" % e, RuntimeWarning, + stacklevel=3) + + # for subclasses + return self._wrap_union_result(other, result) + + def _wrap_union_result(self, other, result): + name = self.name if self.name == other.name else None + return self.__class__(result, name=name) + + def intersection(self, other): + """ + Form the intersection of two Index objects. + + This returns a new Index with elements common to the index and `other`. + Sortedness of the result is not guaranteed. + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + intersection : Index + + Examples + -------- + + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.intersection(idx2) + Int64Index([3, 4], dtype='int64') + + """ + self._assert_can_do_setop(other) + other = _ensure_index(other) + + if self.equals(other): + return self + + if not com.is_dtype_equal(self.dtype, other.dtype): + this = self.astype('O') + other = other.astype('O') + return this.intersection(other) + + if self.is_monotonic and other.is_monotonic: + try: + result = self._inner_indexer(self.values, other._values)[0] + return self._wrap_union_result(other, result) + except TypeError: + pass + + try: + indexer = Index(self.values).get_indexer(other._values) + indexer = indexer.take((indexer != -1).nonzero()[0]) + except: + # duplicates + indexer = Index(self.values).get_indexer_non_unique( + other._values)[0].unique() + indexer = indexer[indexer != -1] + + taken = self.take(indexer) + if self.name != other.name: + taken.name = None + return taken + + def difference(self, other): + """ + Return a new Index with elements from the index that are not in + `other`. + + This is the sorted set difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + difference : Index + + Examples + -------- + + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.difference(idx2) + Int64Index([1, 2], dtype='int64') + + """ + self._assert_can_do_setop(other) + + if self.equals(other): + return Index([], name=self.name) + + other, result_name = self._convert_can_do_setop(other) + + theDiff = sorted(set(self) - set(other)) + return Index(theDiff, name=result_name) + + diff = deprecate('diff', difference) + + def sym_diff(self, other, result_name=None): + """ + Compute the sorted symmetric difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + result_name : str + + Returns + ------- + sym_diff : Index + + Notes + ----- + ``sym_diff`` contains elements that appear in either ``idx1`` or + ``idx2`` but not both. Equivalent to the Index created by + ``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped. + + The sorting of a result containing ``NaN`` values is not guaranteed + across Python versions. See GitHub issue #6444. + + Examples + -------- + >>> idx1 = Index([1, 2, 3, 4]) + >>> idx2 = Index([2, 3, 4, 5]) + >>> idx1.sym_diff(idx2) + Int64Index([1, 5], dtype='int64') + + You can also use the ``^`` operator: + + >>> idx1 ^ idx2 + Int64Index([1, 5], dtype='int64') + """ + self._assert_can_do_setop(other) + other, result_name_update = self._convert_can_do_setop(other) + if result_name is None: + result_name = result_name_update + + the_diff = sorted(set((self.difference(other)). + union(other.difference(self)))) + attribs = self._get_attributes_dict() + attribs['name'] = result_name + if 'freq' in attribs: + attribs['freq'] = None + return self._shallow_copy_with_infer(the_diff, **attribs) + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Parameters + ---------- + key : label + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + tolerance : optional + Maximum distance from index value for inexact matches. The value of + the index at the matching location most satisfy the equation + ``abs(index[loc] - key) <= tolerance``. + + .. versionadded:: 0.17.0 + + Returns + ------- + loc : int if unique index, possibly slice or mask if not + """ + if method is None: + if tolerance is not None: + raise ValueError('tolerance argument only valid if using pad, ' + 'backfill or nearest lookups') + key = _values_from_object(key) + return self._engine.get_loc(key) + + indexer = self.get_indexer([key], method=method, tolerance=tolerance) + if indexer.ndim > 1 or indexer.size > 1: + raise TypeError('get_loc requires scalar valued input') + loc = indexer.item() + if loc == -1: + raise KeyError(key) + return loc + + def get_value(self, series, key): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + + # if we have something that is Index-like, then + # use this, e.g. DatetimeIndex + s = getattr(series, '_values', None) + if isinstance(s, Index) and lib.isscalar(key): + return s[key] + + s = _values_from_object(series) + k = _values_from_object(key) + + # prevent integer truncation bug in indexing + if is_float(k) and not self.is_floating(): + raise KeyError + + try: + return self._engine.get_value(s, k) + except KeyError as e1: + if len(self) > 0 and self.inferred_type in ['integer', 'boolean']: + raise + + try: + return tslib.get_value_box(s, key) + except IndexError: + raise + except TypeError: + # generator/iterator-like + if is_iterator(key): + raise InvalidIndexError(key) + else: + raise e1 + except Exception: # pragma: no cover + raise e1 + except TypeError: + # python 3 + if np.isscalar(key): # pragma: no cover + raise IndexError(key) + raise InvalidIndexError(key) + + def set_value(self, arr, key, value): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + self._engine.set_value(_values_from_object(arr), + _values_from_object(key), value) + + def get_level_values(self, level): + """ + Return vector of label values for requested level, equal to the length + of the index + + Parameters + ---------- + level : int + + Returns + ------- + values : ndarray + """ + # checks that level number is actually just 1 + self._validate_index_level(level) + return self + + def get_indexer(self, target, method=None, limit=None, tolerance=None): + """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : Index + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + limit : int, optional + Maximum number of consecutive labels in ``target`` to match for + inexact matches. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + .. versionadded:: 0.17.0 + + Examples + -------- + >>> indexer = index.get_indexer(new_index) + >>> new_values = cur_values.take(indexer) + + Returns + ------- + indexer : ndarray of int + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + """ + method = _clean_reindex_fill_method(method) + target = _ensure_index(target) + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance) + + pself, ptarget = self._possibly_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer(ptarget, method=method, limit=limit, + tolerance=tolerance) + + if not com.is_dtype_equal(self.dtype, target.dtype): + this = self.astype(object) + target = target.astype(object) + return this.get_indexer(target, method=method, limit=limit, + tolerance=tolerance) + + if not self.is_unique: + raise InvalidIndexError('Reindexing only valid with uniquely' + ' valued Index objects') + + if method == 'pad' or method == 'backfill': + indexer = self._get_fill_indexer(target, method, limit, tolerance) + elif method == 'nearest': + indexer = self._get_nearest_indexer(target, limit, tolerance) + else: + if tolerance is not None: + raise ValueError('tolerance argument only valid if doing pad, ' + 'backfill or nearest reindexing') + if limit is not None: + raise ValueError('limit argument only valid if doing pad, ' + 'backfill or nearest reindexing') + + indexer = self._engine.get_indexer(target._values) + + return com._ensure_platform_int(indexer) + + def _convert_tolerance(self, tolerance): + # override this method on subclasses + return tolerance + + def _get_fill_indexer(self, target, method, limit=None, tolerance=None): + if self.is_monotonic_increasing and target.is_monotonic_increasing: + method = (self._engine.get_pad_indexer if method == 'pad' else + self._engine.get_backfill_indexer) + indexer = method(target._values, limit) + else: + indexer = self._get_fill_indexer_searchsorted(target, method, + limit) + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target._values, indexer, + tolerance) + return indexer + + def _get_fill_indexer_searchsorted(self, target, method, limit=None): + """ + Fallback pad/backfill get_indexer that works for monotonic decreasing + indexes and non-monotonic targets + """ + if limit is not None: + raise ValueError('limit argument for %r method only well-defined ' + 'if index and target are monotonic' % method) + + side = 'left' if method == 'pad' else 'right' + target = np.asarray(target) + + # find exact matches first (this simplifies the algorithm) + indexer = self.get_indexer(target) + nonexact = (indexer == -1) + indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], + side) + if side == 'left': + # searchsorted returns "indices into a sorted array such that, + # if the corresponding elements in v were inserted before the + # indices, the order of a would be preserved". + # Thus, we need to subtract 1 to find values to the left. + indexer[nonexact] -= 1 + # This also mapped not found values (values of 0 from + # np.searchsorted) to -1, which conveniently is also our + # sentinel for missing values + else: + # Mark indices to the right of the largest value as not found + indexer[indexer == len(self)] = -1 + return indexer + + def _get_nearest_indexer(self, target, limit, tolerance): + """ + Get the indexer for the nearest index labels; requires an index with + values that can be subtracted from each other (e.g., not strings or + tuples). + """ + left_indexer = self.get_indexer(target, 'pad', limit=limit) + right_indexer = self.get_indexer(target, 'backfill', limit=limit) + + target = np.asarray(target) + left_distances = abs(self.values[left_indexer] - target) + right_distances = abs(self.values[right_indexer] - target) + + op = operator.lt if self.is_monotonic_increasing else operator.le + indexer = np.where(op(left_distances, right_distances) | + (right_indexer == -1), left_indexer, right_indexer) + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target, indexer, + tolerance) + return indexer + + def _filter_indexer_tolerance(self, target, indexer, tolerance): + distance = abs(self.values[indexer] - target) + indexer = np.where(distance <= tolerance, indexer, -1) + return indexer + + def get_indexer_non_unique(self, target): + """ return an indexer suitable for taking from a non unique index + return the labels in the same order as the target, and + return a missing indexer into the target (missing are marked as -1 + in the indexer); target must be an iterable """ + target = _ensure_index(target) + pself, ptarget = self._possibly_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer_non_unique(ptarget) + + if self.is_all_dates: + self = Index(self.asi8) + tgt_values = target.asi8 + else: + tgt_values = target._values + + indexer, missing = self._engine.get_indexer_non_unique(tgt_values) + return Index(indexer), missing + + def get_indexer_for(self, target, **kwargs): + """ guaranteed return of an indexer even when non-unique """ + if self.is_unique: + return self.get_indexer(target, **kwargs) + indexer, _ = self.get_indexer_non_unique(target, **kwargs) + return indexer + + def _possibly_promote(self, other): + # A hack, but it works + from pandas.tseries.index import DatetimeIndex + if self.inferred_type == 'date' and isinstance(other, DatetimeIndex): + return DatetimeIndex(self), other + elif self.inferred_type == 'boolean': + if not is_object_dtype(self.dtype): + return self.astype('object'), other.astype('object') + return self, other + + def groupby(self, to_groupby): + """ + Group the index labels by a given array of values. + + Parameters + ---------- + to_groupby : array + Values used to determine the groups. + + Returns + ------- + groups : dict + {group name -> group labels} + + """ + return self._groupby(self.values, _values_from_object(to_groupby)) + + def map(self, mapper): + return self._arrmap(self.values, mapper) + + def isin(self, values, level=None): + """ + Compute boolean array of whether each index value is found in the + passed set of values. + + Parameters + ---------- + values : set or sequence of values + Sought values. + level : str or int, optional + Name or position of the index level to use (if the index is a + MultiIndex). + + Notes + ----- + If `level` is specified: + + - if it is the name of one *and only one* index level, use that level; + - otherwise it should be a number indicating level position. + + Returns + ------- + is_contained : ndarray (boolean dtype) + + """ + if level is not None: + self._validate_index_level(level) + return algorithms.isin(np.array(self), values) + + def _can_reindex(self, indexer): + """ + *this is an internal non-public method* + + Check if we are allowing reindexing with this particular indexer + + Parameters + ---------- + indexer : an integer indexer + + Raises + ------ + ValueError if its a duplicate axis + """ + + # trying to reindex on an axis with duplicates + if not self.is_unique and len(indexer): + raise ValueError("cannot reindex from a duplicate axis") + + def reindex(self, target, method=None, level=None, limit=None, + tolerance=None): + """ + Create index with target's values (move/add/delete values as necessary) + + Parameters + ---------- + target : an iterable + + Returns + ------- + new_index : pd.Index + Resulting index + indexer : np.ndarray or None + Indices of output values in original index + + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, 'name') + + # GH7774: preserve dtype/tz if target is empty and not an Index. + target = _ensure_has_len(target) # target may be an iterator + + if not isinstance(target, Index) and len(target) == 0: + attrs = self._get_attributes_dict() + attrs.pop('freq', None) # don't preserve freq + target = self._simple_new(None, dtype=self.dtype, **attrs) + else: + target = _ensure_index(target) + + if level is not None: + if method is not None: + raise TypeError('Fill method not supported if level passed') + _, indexer, _ = self._join_level(target, level, how='right', + return_indexers=True) + else: + if self.equals(target): + indexer = None + else: + if self.is_unique: + indexer = self.get_indexer(target, method=method, + limit=limit, + tolerance=tolerance) + else: + if method is not None or limit is not None: + raise ValueError("cannot reindex a non-unique index " + "with a method or limit") + indexer, missing = self.get_indexer_non_unique(target) + + if preserve_names and target.nlevels == 1 and target.name != self.name: + target = target.copy() + target.name = self.name + + return target, indexer + + def _reindex_non_unique(self, target): + """ + *this is an internal non-public method* + + Create a new index with target's values (move/add/delete values as + necessary) use with non-unique Index and a possibly non-unique target + + Parameters + ---------- + target : an iterable + + Returns + ------- + new_index : pd.Index + Resulting index + indexer : np.ndarray or None + Indices of output values in original index + + """ + + target = _ensure_index(target) + indexer, missing = self.get_indexer_non_unique(target) + check = indexer != -1 + new_labels = self.take(indexer[check]) + new_indexer = None + + if len(missing): + l = np.arange(len(indexer)) + + missing = com._ensure_platform_int(missing) + missing_labels = target.take(missing) + missing_indexer = _ensure_int64(l[~check]) + cur_labels = self.take(indexer[check])._values + cur_indexer = _ensure_int64(l[check]) + + new_labels = np.empty(tuple([len(indexer)]), dtype=object) + new_labels[cur_indexer] = cur_labels + new_labels[missing_indexer] = missing_labels + + # a unique indexer + if target.is_unique: + + # see GH5553, make sure we use the right indexer + new_indexer = np.arange(len(indexer)) + new_indexer[cur_indexer] = np.arange(len(cur_labels)) + new_indexer[missing_indexer] = -1 + + # we have a non_unique selector, need to use the original + # indexer here + else: + + # need to retake to have the same size as the indexer + indexer = indexer._values + indexer[~check] = 0 + + # reset the new indexer to account for the new size + new_indexer = np.arange(len(self.take(indexer))) + new_indexer[~check] = -1 + + new_index = self._shallow_copy_with_infer(new_labels, freq=None) + return new_index, indexer, new_indexer + + def join(self, other, how='left', level=None, return_indexers=False): + """ + *this is an internal non-public method* + + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : boolean, default False + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + from .multi import MultiIndex + self_is_mi = isinstance(self, MultiIndex) + other_is_mi = isinstance(other, MultiIndex) + + # try to figure out the join level + # GH3662 + if level is None and (self_is_mi or other_is_mi): + + # have the same levels/names so a simple join + if self.names == other.names: + pass + else: + return self._join_multi(other, how=how, + return_indexers=return_indexers) + + # join on the level + if level is not None and (self_is_mi or other_is_mi): + return self._join_level(other, level, how=how, + return_indexers=return_indexers) + + other = _ensure_index(other) + + if len(other) == 0 and how in ('left', 'outer'): + join_index = self._shallow_copy() + if return_indexers: + rindexer = np.repeat(-1, len(join_index)) + return join_index, None, rindexer + else: + return join_index + + if len(self) == 0 and how in ('right', 'outer'): + join_index = other._shallow_copy() + if return_indexers: + lindexer = np.repeat(-1, len(join_index)) + return join_index, lindexer, None + else: + return join_index + + if self._join_precedence < other._join_precedence: + how = {'right': 'left', 'left': 'right'}.get(how, how) + result = other.join(self, how=how, level=level, + return_indexers=return_indexers) + if return_indexers: + x, y, z = result + result = x, z, y + return result + + if not com.is_dtype_equal(self.dtype, other.dtype): + this = self.astype('O') + other = other.astype('O') + return this.join(other, how=how, return_indexers=return_indexers) + + _validate_join_method(how) + + if not self.is_unique and not other.is_unique: + return self._join_non_unique(other, how=how, + return_indexers=return_indexers) + elif not self.is_unique or not other.is_unique: + if self.is_monotonic and other.is_monotonic: + return self._join_monotonic(other, how=how, + return_indexers=return_indexers) + else: + return self._join_non_unique(other, how=how, + return_indexers=return_indexers) + elif self.is_monotonic and other.is_monotonic: + try: + return self._join_monotonic(other, how=how, + return_indexers=return_indexers) + except TypeError: + pass + + if how == 'left': + join_index = self + elif how == 'right': + join_index = other + elif how == 'inner': + join_index = self.intersection(other) + elif how == 'outer': + join_index = self.union(other) + + if return_indexers: + if join_index is self: + lindexer = None + else: + lindexer = self.get_indexer(join_index) + if join_index is other: + rindexer = None + else: + rindexer = other.get_indexer(join_index) + return join_index, lindexer, rindexer + else: + return join_index + + def _join_multi(self, other, how, return_indexers=True): + from .multi import MultiIndex + self_is_mi = isinstance(self, MultiIndex) + other_is_mi = isinstance(other, MultiIndex) + + # figure out join names + self_names = [n for n in self.names if n is not None] + other_names = [n for n in other.names if n is not None] + overlap = list(set(self_names) & set(other_names)) + + # need at least 1 in common, but not more than 1 + if not len(overlap): + raise ValueError("cannot join with no level specified and no " + "overlapping names") + if len(overlap) > 1: + raise NotImplementedError("merging with more than one level " + "overlap on a multi-index is not " + "implemented") + jl = overlap[0] + + # make the indices into mi's that match + if not (self_is_mi and other_is_mi): + + flip_order = False + if self_is_mi: + self, other = other, self + flip_order = True + # flip if join method is right or left + how = {'right': 'left', 'left': 'right'}.get(how, how) + + level = other.names.index(jl) + result = self._join_level(other, level, how=how, + return_indexers=return_indexers) + + if flip_order: + if isinstance(result, tuple): + return result[0], result[2], result[1] + return result + + # 2 multi-indexes + raise NotImplementedError("merging with both multi-indexes is not " + "implemented") + + def _join_non_unique(self, other, how='left', return_indexers=False): + from pandas.tools.merge import _get_join_indexers + + left_idx, right_idx = _get_join_indexers([self.values], + [other._values], how=how, + sort=True) + + left_idx = com._ensure_platform_int(left_idx) + right_idx = com._ensure_platform_int(right_idx) + + join_index = self.values.take(left_idx) + mask = left_idx == -1 + np.putmask(join_index, mask, other._values.take(right_idx)) + + join_index = self._wrap_joined_index(join_index, other) + + if return_indexers: + return join_index, left_idx, right_idx + else: + return join_index + + def _join_level(self, other, level, how='left', return_indexers=False, + keep_order=True): + """ + The join method *only* affects the level of the resulting + MultiIndex. Otherwise it just exactly aligns the Index data to the + labels of the level in the MultiIndex. If `keep_order` == True, the + order of the data indexed by the MultiIndex will not be changed; + otherwise, it will tie out with `other`. + """ + from pandas.algos import groupsort_indexer + from .multi import MultiIndex + + def _get_leaf_sorter(labels): + ''' + returns sorter for the inner most level while preserving the + order of higher levels + ''' + if labels[0].size == 0: + return np.empty(0, dtype='int64') + + if len(labels) == 1: + lab = _ensure_int64(labels[0]) + sorter, _ = groupsort_indexer(lab, 1 + lab.max()) + return sorter + + # find indexers of begining of each set of + # same-key labels w.r.t all but last level + tic = labels[0][:-1] != labels[0][1:] + for lab in labels[1:-1]: + tic |= lab[:-1] != lab[1:] + + starts = np.hstack(([True], tic, [True])).nonzero()[0] + lab = _ensure_int64(labels[-1]) + return lib.get_level_sorter(lab, _ensure_int64(starts)) + + if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): + raise TypeError('Join on level between two MultiIndex objects ' + 'is ambiguous') + + left, right = self, other + + flip_order = not isinstance(self, MultiIndex) + if flip_order: + left, right = right, left + how = {'right': 'left', 'left': 'right'}.get(how, how) + + level = left._get_level_number(level) + old_level = left.levels[level] + + if not right.is_unique: + raise NotImplementedError('Index._join_level on non-unique index ' + 'is not implemented') + + new_level, left_lev_indexer, right_lev_indexer = \ + old_level.join(right, how=how, return_indexers=True) + + if left_lev_indexer is None: + if keep_order or len(left) == 0: + left_indexer = None + join_index = left + else: # sort the leaves + left_indexer = _get_leaf_sorter(left.labels[:level + 1]) + join_index = left[left_indexer] + + else: + left_lev_indexer = _ensure_int64(left_lev_indexer) + rev_indexer = lib.get_reverse_indexer(left_lev_indexer, + len(old_level)) + + new_lev_labels = com.take_nd(rev_indexer, left.labels[level], + allow_fill=False) + + new_labels = list(left.labels) + new_labels[level] = new_lev_labels + + new_levels = list(left.levels) + new_levels[level] = new_level + + if keep_order: # just drop missing values. o.w. keep order + left_indexer = np.arange(len(left)) + mask = new_lev_labels != -1 + if not mask.all(): + new_labels = [lab[mask] for lab in new_labels] + left_indexer = left_indexer[mask] + + else: # tie out the order with other + if level == 0: # outer most level, take the fast route + ngroups = 1 + new_lev_labels.max() + left_indexer, counts = groupsort_indexer(new_lev_labels, + ngroups) + # missing values are placed first; drop them! + left_indexer = left_indexer[counts[0]:] + new_labels = [lab[left_indexer] for lab in new_labels] + + else: # sort the leaves + mask = new_lev_labels != -1 + mask_all = mask.all() + if not mask_all: + new_labels = [lab[mask] for lab in new_labels] + + left_indexer = _get_leaf_sorter(new_labels[:level + 1]) + new_labels = [lab[left_indexer] for lab in new_labels] + + # left_indexers are w.r.t masked frame. + # reverse to original frame! + if not mask_all: + left_indexer = mask.nonzero()[0][left_indexer] + + join_index = MultiIndex(levels=new_levels, labels=new_labels, + names=left.names, verify_integrity=False) + + if right_lev_indexer is not None: + right_indexer = com.take_nd(right_lev_indexer, + join_index.labels[level], + allow_fill=False) + else: + right_indexer = join_index.labels[level] + + if flip_order: + left_indexer, right_indexer = right_indexer, left_indexer + + if return_indexers: + return join_index, left_indexer, right_indexer + else: + return join_index + + def _join_monotonic(self, other, how='left', return_indexers=False): + if self.equals(other): + ret_index = other if how == 'right' else self + if return_indexers: + return ret_index, None, None + else: + return ret_index + + sv = self.values + ov = other._values + + if self.is_unique and other.is_unique: + # We can perform much better than the general case + if how == 'left': + join_index = self + lidx = None + ridx = self._left_indexer_unique(sv, ov) + elif how == 'right': + join_index = other + lidx = self._left_indexer_unique(ov, sv) + ridx = None + elif how == 'inner': + join_index, lidx, ridx = self._inner_indexer(sv, ov) + join_index = self._wrap_joined_index(join_index, other) + elif how == 'outer': + join_index, lidx, ridx = self._outer_indexer(sv, ov) + join_index = self._wrap_joined_index(join_index, other) + else: + if how == 'left': + join_index, lidx, ridx = self._left_indexer(sv, ov) + elif how == 'right': + join_index, ridx, lidx = self._left_indexer(ov, sv) + elif how == 'inner': + join_index, lidx, ridx = self._inner_indexer(sv, ov) + elif how == 'outer': + join_index, lidx, ridx = self._outer_indexer(sv, ov) + join_index = self._wrap_joined_index(join_index, other) + + if return_indexers: + return join_index, lidx, ridx + else: + return join_index + + def _wrap_joined_index(self, joined, other): + name = self.name if self.name == other.name else None + return Index(joined, name=name) + + def slice_indexer(self, start=None, end=None, step=None, kind=None): + """ + For an ordered Index, compute the slice indexer for input labels and + step + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning + end : label, default None + If None, defaults to the end + step : int, default None + kind : string, default None + + Returns + ------- + indexer : ndarray or slice + + Notes + ----- + This function assumes that the data is sorted, so use at your own peril + """ + start_slice, end_slice = self.slice_locs(start, end, step=step, + kind=kind) + + # return a slice + if not lib.isscalar(start_slice): + raise AssertionError("Start slice bound is non-scalar") + if not lib.isscalar(end_slice): + raise AssertionError("End slice bound is non-scalar") + + return slice(start_slice, end_slice, step) + + def _maybe_cast_slice_bound(self, label, side, kind): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : string / None + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + + """ + + # We are a plain index here (sub-class override this method if they + # wish to have special treatment for floats/ints, e.g. Float64Index and + # datetimelike Indexes + # reject them + if is_float(label): + self._invalid_indexer('slice', label) + + # we are trying to find integer bounds on a non-integer based index + # this is rejected (generally .loc gets you here) + elif is_integer(label): + self._invalid_indexer('slice', label) + + return label + + def _searchsorted_monotonic(self, label, side='left'): + if self.is_monotonic_increasing: + return self.searchsorted(label, side=side) + elif self.is_monotonic_decreasing: + # np.searchsorted expects ascending sort order, have to reverse + # everything for it to work (element ordering, search side and + # resulting value). + pos = self[::-1].searchsorted(label, side='right' if side == 'left' + else 'right') + return len(self) - pos + + raise ValueError('index must be monotonic increasing or decreasing') + + def get_slice_bound(self, label, side, kind): + """ + Calculate slice bound that corresponds to given label. + + Returns leftmost (one-past-the-rightmost if ``side=='right'``) position + of given label. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : string / None, the type of indexer + + """ + if side not in ('left', 'right'): + raise ValueError("Invalid value for side kwarg," + " must be either 'left' or 'right': %s" % + (side, )) + + original_label = label + + # For datetime indices label may be a string that has to be converted + # to datetime boundary according to its resolution. + label = self._maybe_cast_slice_bound(label, side, kind) + + # we need to look up the label + try: + slc = self.get_loc(label) + except KeyError as err: + try: + return self._searchsorted_monotonic(label, side) + except ValueError: + # raise the original KeyError + raise err + + if isinstance(slc, np.ndarray): + # get_loc may return a boolean array or an array of indices, which + # is OK as long as they are representable by a slice. + if is_bool_dtype(slc): + slc = lib.maybe_booleans_to_slice(slc.view('u1')) + else: + slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self)) + if isinstance(slc, np.ndarray): + raise KeyError("Cannot get %s slice bound for non-unique " + "label: %r" % (side, original_label)) + + if isinstance(slc, slice): + if side == 'left': + return slc.start + else: + return slc.stop + else: + if side == 'right': + return slc + 1 + else: + return slc + + def slice_locs(self, start=None, end=None, step=None, kind=None): + """ + Compute slice locations for input labels. + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning + end : label, default None + If None, defaults to the end + step : int, defaults None + If None, defaults to 1 + kind : string, defaults None + + Returns + ------- + start, end : int + + """ + inc = (step is None or step >= 0) + + if not inc: + # If it's a reverse slice, temporarily swap bounds. + start, end = end, start + + start_slice = None + if start is not None: + start_slice = self.get_slice_bound(start, 'left', kind) + if start_slice is None: + start_slice = 0 + + end_slice = None + if end is not None: + end_slice = self.get_slice_bound(end, 'right', kind) + if end_slice is None: + end_slice = len(self) + + if not inc: + # Bounds at this moment are swapped, swap them back and shift by 1. + # + # slice_locs('B', 'A', step=-1): s='B', e='A' + # + # s='A' e='B' + # AFTER SWAP: | | + # v ------------------> V + # ----------------------------------- + # | | |A|A|A|A| | | | | |B|B| | | | | + # ----------------------------------- + # ^ <------------------ ^ + # SHOULD BE: | | + # end=s-1 start=e-1 + # + end_slice, start_slice = start_slice - 1, end_slice - 1 + + # i == -1 triggers ``len(self) + i`` selection that points to the + # last element, not before-the-first one, subtracting len(self) + # compensates that. + if end_slice == -1: + end_slice -= len(self) + if start_slice == -1: + start_slice -= len(self) + + return start_slice, end_slice + + def delete(self, loc): + """ + Make new Index with passed location(-s) deleted + + Returns + ------- + new_index : Index + """ + return self._shallow_copy(np.delete(self._data, loc)) + + def insert(self, loc, item): + """ + Make new Index inserting new item at location. Follows + Python list.append semantics for negative values + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : Index + """ + _self = np.asarray(self) + item = self._coerce_scalar_to_index(item)._values + + idx = np.concatenate((_self[:loc], item, _self[loc:])) + return self._shallow_copy_with_infer(idx) + + def drop(self, labels, errors='raise'): + """ + Make new Index with passed list of labels deleted + + Parameters + ---------- + labels : array-like + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. + + Returns + ------- + dropped : Index + """ + labels = com._index_labels_to_array(labels) + indexer = self.get_indexer(labels) + mask = indexer == -1 + if mask.any(): + if errors != 'ignore': + raise ValueError('labels %s not contained in axis' % + labels[mask]) + indexer = indexer[~mask] + return self.delete(indexer) + + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) + @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) + def drop_duplicates(self, keep='first'): + return super(Index, self).drop_duplicates(keep=keep) + + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) + @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) + def duplicated(self, keep='first'): + return super(Index, self).duplicated(keep=keep) + + _index_shared_docs['fillna'] = """ + Fill NA/NaN values with the specified value + + Parameters + ---------- + value : scalar + Scalar value to use to fill holes (e.g. 0). + This value cannot be a list-likes. + downcast : dict, default is None + a dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible) + + Returns + ------- + filled : Index + """ + + @Appender(_index_shared_docs['fillna']) + def fillna(self, value=None, downcast=None): + self._assert_can_do_op(value) + if self.hasnans: + result = self.putmask(self._isnan, value) + if downcast is None: + # no need to care metadata other than name + # because it can't have freq if + return Index(result, name=self.name) + return self._shallow_copy() + + def _evaluate_with_timedelta_like(self, other, op, opstr): + raise TypeError("can only perform ops with timedelta like values") + + def _evaluate_with_datetime_like(self, other, op, opstr): + raise TypeError("can only perform ops with datetime like values") + + @classmethod + def _add_comparison_methods(cls): + """ add in comparison methods """ + + def _make_compare(op): + def _evaluate_compare(self, other): + if isinstance(other, (np.ndarray, Index, ABCSeries)): + if other.ndim > 0 and len(self) != len(other): + raise ValueError('Lengths must match to compare') + func = getattr(self.values, op) + result = func(np.asarray(other)) + + # technically we could support bool dtyped Index + # for now just return the indexing array directly + if is_bool_dtype(result): + return result + try: + return Index(result) + except TypeError: + return result + + return _evaluate_compare + + cls.__eq__ = _make_compare('__eq__') + cls.__ne__ = _make_compare('__ne__') + cls.__lt__ = _make_compare('__lt__') + cls.__gt__ = _make_compare('__gt__') + cls.__le__ = _make_compare('__le__') + cls.__ge__ = _make_compare('__ge__') + + @classmethod + def _add_numericlike_set_methods_disabled(cls): + """ add in the numeric set-like methods to disable """ + + def _make_invalid_op(name): + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + + invalid_op.__name__ = name + return invalid_op + + cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa + cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa + + @classmethod + def _add_numeric_methods_disabled(cls): + """ add in numeric methods to disable """ + + def _make_invalid_op(name): + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + + invalid_op.__name__ = name + return invalid_op + + cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') + cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') + cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') + cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__') + if not compat.PY3: + cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__') + cls.__neg__ = _make_invalid_op('__neg__') + cls.__pos__ = _make_invalid_op('__pos__') + cls.__abs__ = _make_invalid_op('__abs__') + cls.__inv__ = _make_invalid_op('__inv__') + + def _maybe_update_attributes(self, attrs): + """ Update Index attributes (e.g. freq) depending on op """ + return attrs + + def _validate_for_numeric_unaryop(self, op, opstr): + """ validate if we can perform a numeric unary operation """ + + if not self._is_numeric_dtype: + raise TypeError("cannot evaluate a numeric op " + "{opstr} for type: {typ}".format( + opstr=opstr, + typ=type(self)) + ) + + def _validate_for_numeric_binop(self, other, op, opstr): + """ + return valid other, evaluate or raise TypeError + if we are not of the appropriate type + + internal method called by ops + """ + from pandas.tseries.offsets import DateOffset + + # if we are an inheritor of numeric, + # but not actually numeric (e.g. DatetimeIndex/PeriodInde) + if not self._is_numeric_dtype: + raise TypeError("cannot evaluate a numeric op {opstr} " + "for type: {typ}".format( + opstr=opstr, + typ=type(self)) + ) + + if isinstance(other, Index): + if not other._is_numeric_dtype: + raise TypeError("cannot evaluate a numeric op " + "{opstr} with type: {typ}".format( + opstr=type(self), + typ=type(other)) + ) + elif isinstance(other, np.ndarray) and not other.ndim: + other = other.item() + + if isinstance(other, (Index, ABCSeries, np.ndarray)): + if len(self) != len(other): + raise ValueError("cannot evaluate a numeric op with " + "unequal lengths") + other = _values_from_object(other) + if other.dtype.kind not in ['f', 'i']: + raise TypeError("cannot evaluate a numeric op " + "with a non-numeric dtype") + elif isinstance(other, (DateOffset, np.timedelta64, + Timedelta, datetime.timedelta)): + # higher up to handle + pass + elif isinstance(other, (Timestamp, np.datetime64)): + # higher up to handle + pass + else: + if not (is_float(other) or is_integer(other)): + raise TypeError("can only perform ops with scalar values") + + return other + + @classmethod + def _add_numeric_methods_binary(cls): + """ add in numeric methods """ + + def _make_evaluate_binop(op, opstr, reversed=False): + def _evaluate_numeric_binop(self, other): + + from pandas.tseries.offsets import DateOffset + other = self._validate_for_numeric_binop(other, op, opstr) + + # handle time-based others + if isinstance(other, (DateOffset, np.timedelta64, + Timedelta, datetime.timedelta)): + return self._evaluate_with_timedelta_like(other, op, opstr) + elif isinstance(other, (Timestamp, np.datetime64)): + return self._evaluate_with_datetime_like(other, op, opstr) + + # if we are a reversed non-communative op + values = self.values + if reversed: + values, other = other, values + + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + return Index(op(values, other), **attrs) + + return _evaluate_numeric_binop + + cls.__add__ = cls.__radd__ = _make_evaluate_binop( + operator.add, '__add__') + cls.__sub__ = _make_evaluate_binop( + operator.sub, '__sub__') + cls.__rsub__ = _make_evaluate_binop( + operator.sub, '__sub__', reversed=True) + cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( + operator.mul, '__mul__') + cls.__pow__ = cls.__rpow__ = _make_evaluate_binop( + operator.pow, '__pow__') + cls.__mod__ = _make_evaluate_binop( + operator.mod, '__mod__') + cls.__floordiv__ = _make_evaluate_binop( + operator.floordiv, '__floordiv__') + cls.__rfloordiv__ = _make_evaluate_binop( + operator.floordiv, '__floordiv__', reversed=True) + cls.__truediv__ = _make_evaluate_binop( + operator.truediv, '__truediv__') + cls.__rtruediv__ = _make_evaluate_binop( + operator.truediv, '__truediv__', reversed=True) + if not compat.PY3: + cls.__div__ = _make_evaluate_binop( + operator.div, '__div__') + cls.__rdiv__ = _make_evaluate_binop( + operator.div, '__div__', reversed=True) + + @classmethod + def _add_numeric_methods_unary(cls): + """ add in numeric unary methods """ + + def _make_evaluate_unary(op, opstr): + + def _evaluate_numeric_unary(self): + + self._validate_for_numeric_unaryop(op, opstr) + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + return Index(op(self.values), **attrs) + + return _evaluate_numeric_unary + + cls.__neg__ = _make_evaluate_unary(lambda x: -x, '__neg__') + cls.__pos__ = _make_evaluate_unary(lambda x: x, '__pos__') + cls.__abs__ = _make_evaluate_unary(np.abs, '__abs__') + cls.__inv__ = _make_evaluate_unary(lambda x: -x, '__inv__') + + @classmethod + def _add_numeric_methods(cls): + cls._add_numeric_methods_unary() + cls._add_numeric_methods_binary() + + @classmethod + def _add_logical_methods(cls): + """ add in logical methods """ + + _doc = """ + + %(desc)s + + Parameters + ---------- + All arguments to numpy.%(outname)s are accepted. + + Returns + ------- + %(outname)s : bool or array_like (if axis is specified) + A single element array_like may be converted to bool.""" + + def _make_logical_function(name, desc, f): + @Substitution(outname=name, desc=desc) + @Appender(_doc) + def logical_func(self, *args, **kwargs): + result = f(self.values) + if (isinstance(result, (np.ndarray, ABCSeries, Index)) and + result.ndim == 0): + # return NumPy type + return result.dtype.type(result.item()) + else: # pragma: no cover + return result + + logical_func.__name__ = name + return logical_func + + cls.all = _make_logical_function('all', 'Return whether all elements ' + 'are True', + np.all) + cls.any = _make_logical_function('any', + 'Return whether any element is True', + np.any) + + @classmethod + def _add_logical_methods_disabled(cls): + """ add in logical methods to disable """ + + def _make_invalid_op(name): + def invalid_op(self, other=None): + raise TypeError("cannot perform {name} with this index type: " + "{typ}".format(name=name, typ=type(self))) + + invalid_op.__name__ = name + return invalid_op + + cls.all = _make_invalid_op('all') + cls.any = _make_invalid_op('any') + + +Index._add_numeric_methods_disabled() +Index._add_logical_methods() +Index._add_comparison_methods() + + +def _ensure_index(index_like, copy=False): + if isinstance(index_like, Index): + if copy: + index_like = index_like.copy() + return index_like + if hasattr(index_like, 'name'): + return Index(index_like, name=index_like.name, copy=copy) + + # must check for exactly list here because of strict type + # check in clean_index_list + if isinstance(index_like, list): + if type(index_like) != list: + index_like = list(index_like) + # 2200 ? + converted, all_arrays = lib.clean_index_list(index_like) + + if len(converted) > 0 and all_arrays: + from .multi import MultiIndex + return MultiIndex.from_arrays(converted) + else: + index_like = converted + else: + # clean_index_list does the equivalent of copying + # so only need to do this if not list instance + if copy: + from copy import copy + index_like = copy(index_like) + + return Index(index_like) + + +def _get_na_value(dtype): + return {np.datetime64: tslib.NaT, + np.timedelta64: tslib.NaT}.get(dtype, np.nan) + + +def _ensure_frozen(array_like, categories, copy=False): + array_like = com._coerce_indexer_dtype(array_like, categories) + array_like = array_like.view(FrozenNDArray) + if copy: + array_like = array_like.copy() + return array_like + + +def _ensure_has_len(seq): + """If seq is an iterator, put its values into a list.""" + try: + len(seq) + except TypeError: + return list(seq) + else: + return seq + + +def _maybe_box(idx): + from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex + klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex + + if isinstance(idx, klasses): + return idx.asobject + + return idx + + +def _trim_front(strings): + """ + Trims zeros and decimal points + """ + trimmed = strings + while len(strings) > 0 and all([x[0] == ' ' for x in trimmed]): + trimmed = [x[1:] for x in trimmed] + return trimmed + + +def _validate_join_method(method): + if method not in ['left', 'right', 'inner', 'outer']: + raise ValueError('do not recognize join method %s' % method) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py new file mode 100644 index 0000000000000..4ead02e5bd022 --- /dev/null +++ b/pandas/indexes/category.py @@ -0,0 +1,598 @@ +import numpy as np +import pandas.lib as lib +import pandas.index as _index + +from pandas import compat +from pandas.util.decorators import (Appender, cache_readonly, + deprecate_kwarg) +from pandas.core.missing import _clean_reindex_fill_method +from pandas.core.config import get_option +from pandas.indexes.base import Index +import pandas.core.base as base +import pandas.core.common as com +import pandas.indexes.base as ibase + + +class CategoricalIndex(Index, base.PandasDelegate): + """ + + Immutable Index implementing an ordered, sliceable set. CategoricalIndex + represents a sparsely populated Index with an underlying Categorical. + + .. versionadded:: 0.16.1 + + Parameters + ---------- + data : array-like or Categorical, (1-dimensional) + categories : optional, array-like + categories for the CategoricalIndex + ordered : boolean, + designating if the categories are ordered + copy : bool + Make a copy of input ndarray + name : object + Name to be stored in the index + + """ + + _typ = 'categoricalindex' + _engine_type = _index.Int64Engine + _attributes = ['name'] + + def __new__(cls, data=None, categories=None, ordered=None, dtype=None, + copy=False, name=None, fastpath=False, **kwargs): + + if fastpath: + return cls._simple_new(data, name=name) + + if isinstance(data, com.ABCCategorical): + data = cls._create_categorical(cls, data, categories, ordered) + elif isinstance(data, CategoricalIndex): + data = data._data + data = cls._create_categorical(cls, data, categories, ordered) + else: + + # don't allow scalars + # if data is None, then categories must be provided + if lib.isscalar(data): + if data is not None or categories is None: + cls._scalar_data_error(data) + data = [] + data = cls._create_categorical(cls, data, categories, ordered) + + if copy: + data = data.copy() + + return cls._simple_new(data, name=name) + + def _create_from_codes(self, codes, categories=None, ordered=None, + name=None): + """ + *this is an internal non-public method* + + create the correct categorical from codes + + Parameters + ---------- + codes : new codes + categories : optional categories, defaults to existing + ordered : optional ordered attribute, defaults to existing + name : optional name attribute, defaults to existing + + Returns + ------- + CategoricalIndex + """ + + from pandas.core.categorical import Categorical + if categories is None: + categories = self.categories + if ordered is None: + ordered = self.ordered + if name is None: + name = self.name + cat = Categorical.from_codes(codes, categories=categories, + ordered=self.ordered) + return CategoricalIndex(cat, name=name) + + @staticmethod + def _create_categorical(self, data, categories=None, ordered=None): + """ + *this is an internal non-public method* + + create the correct categorical from data and the properties + + Parameters + ---------- + data : data for new Categorical + categories : optional categories, defaults to existing + ordered : optional ordered attribute, defaults to existing + + Returns + ------- + Categorical + """ + if not isinstance(data, com.ABCCategorical): + from pandas.core.categorical import Categorical + data = Categorical(data, categories=categories, ordered=ordered) + else: + if categories is not None: + data = data.set_categories(categories) + if ordered is not None: + data = data.set_ordered(ordered) + return data + + @classmethod + def _simple_new(cls, values, name=None, categories=None, ordered=None, + **kwargs): + result = object.__new__(cls) + + values = cls._create_categorical(cls, values, categories, ordered) + result._data = values + result.name = name + for k, v in compat.iteritems(kwargs): + setattr(result, k, v) + + result._reset_identity() + return result + + def _is_dtype_compat(self, other): + """ + *this is an internal non-public method* + + provide a comparison between the dtype of self and other (coercing if + needed) + + Raises + ------ + TypeError if the dtypes are not compatible + """ + if com.is_categorical_dtype(other): + if isinstance(other, CategoricalIndex): + other = other._values + if not other.is_dtype_equal(self): + raise TypeError("categories must match existing categories " + "when appending") + else: + values = other + if not com.is_list_like(values): + values = [values] + other = CategoricalIndex(self._create_categorical( + self, other, categories=self.categories, ordered=self.ordered)) + if not other.isin(values).all(): + raise TypeError("cannot append a non-category item to a " + "CategoricalIndex") + + return other + + def equals(self, other): + """ + Determines if two CategorialIndex objects contain the same elements. + """ + if self.is_(other): + return True + + try: + other = self._is_dtype_compat(other) + return com.array_equivalent(self._data, other) + except (TypeError, ValueError): + pass + + return False + + @property + def _formatter_func(self): + return self.categories._formatter_func + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + max_categories = (10 if get_option("display.max_categories") == 0 else + get_option("display.max_categories")) + attrs = [ + ('categories', + ibase.default_pprint(self.categories, + max_seq_items=max_categories)), + ('ordered', self.ordered)] + if self.name is not None: + attrs.append(('name', ibase.default_pprint(self.name))) + attrs.append(('dtype', "'%s'" % self.dtype)) + max_seq_items = get_option('display.max_seq_items') or len(self) + if len(self) > max_seq_items: + attrs.append(('length', len(self))) + return attrs + + @property + def inferred_type(self): + return 'categorical' + + @property + def values(self): + """ return the underlying data, which is a Categorical """ + return self._data + + def get_values(self): + """ return the underlying data as an ndarray """ + return self._data.get_values() + + @property + def codes(self): + return self._data.codes + + @property + def categories(self): + return self._data.categories + + @property + def ordered(self): + return self._data.ordered + + def __contains__(self, key): + hash(key) + return key in self.values + + def __array__(self, dtype=None): + """ the array interface, return my values """ + return np.array(self._data, dtype=dtype) + + @cache_readonly + def _isnan(self): + """ return if each value is nan""" + return self._data.codes == -1 + + @Appender(ibase._index_shared_docs['fillna']) + def fillna(self, value, downcast=None): + self._assert_can_do_op(value) + return CategoricalIndex(self._data.fillna(value), name=self.name) + + def argsort(self, *args, **kwargs): + return self.values.argsort(*args, **kwargs) + + @cache_readonly + def _engine(self): + + # we are going to look things up with the codes themselves + return self._engine_type(lambda: self.codes.astype('i8'), len(self)) + + @cache_readonly + def is_unique(self): + return not self.duplicated().any() + + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) + @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) + def duplicated(self, keep='first'): + from pandas.hashtable import duplicated_int64 + return duplicated_int64(self.codes.astype('i8'), keep) + + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.astype('object') + + def get_loc(self, key, method=None): + """ + Get integer location for requested label + + Parameters + ---------- + key : label + method : {None} + * default: exact matches only. + + Returns + ------- + loc : int if unique index, possibly slice or mask if not + """ + codes = self.categories.get_loc(key) + if (codes == -1): + raise KeyError(key) + indexer, _ = self._engine.get_indexer_non_unique(np.array([codes])) + if (indexer == -1).any(): + raise KeyError(key) + + return indexer + + def _can_reindex(self, indexer): + """ always allow reindexing """ + pass + + def reindex(self, target, method=None, level=None, limit=None, + tolerance=None): + """ + Create index with target's values (move/add/delete values as necessary) + + Returns + ------- + new_index : pd.Index + Resulting index + indexer : np.ndarray or None + Indices of output values in original index + + """ + + if method is not None: + raise NotImplementedError("argument method is not implemented for " + "CategoricalIndex.reindex") + if level is not None: + raise NotImplementedError("argument level is not implemented for " + "CategoricalIndex.reindex") + if limit is not None: + raise NotImplementedError("argument limit is not implemented for " + "CategoricalIndex.reindex") + + target = ibase._ensure_index(target) + + if not com.is_categorical_dtype(target) and not target.is_unique: + raise ValueError("cannot reindex with a non-unique indexer") + + indexer, missing = self.get_indexer_non_unique(np.array(target)) + new_target = self.take(indexer) + + # filling in missing if needed + if len(missing): + cats = self.categories.get_indexer(target) + + if (cats == -1).any(): + # coerce to a regular index here! + result = Index(np.array(self), name=self.name) + new_target, indexer, _ = result._reindex_non_unique( + np.array(target)) + + else: + + codes = new_target.codes.copy() + codes[indexer == -1] = cats[missing] + new_target = self._create_from_codes(codes) + + # we always want to return an Index type here + # to be consistent with .reindex for other index types (e.g. they don't + # coerce based on the actual values, only on the dtype) + # unless we had an inital Categorical to begin with + # in which case we are going to conform to the passed Categorical + new_target = np.asarray(new_target) + if com.is_categorical_dtype(target): + new_target = target._shallow_copy(new_target, name=self.name) + else: + new_target = Index(new_target, name=self.name) + + return new_target, indexer + + def _reindex_non_unique(self, target): + """ reindex from a non-unique; which CategoricalIndex's are almost + always + """ + new_target, indexer = self.reindex(target) + new_indexer = None + + check = indexer == -1 + if check.any(): + new_indexer = np.arange(len(self.take(indexer))) + new_indexer[check] = -1 + + cats = self.categories.get_indexer(target) + if not (cats == -1).any(): + # .reindex returns normal Index. Revert to CategoricalIndex if + # all targets are included in my categories + new_target = self._shallow_copy(new_target) + + return new_target, indexer, new_indexer + + def get_indexer(self, target, method=None, limit=None, tolerance=None): + """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. The mask determines whether labels are + found or not in the current index + + Parameters + ---------- + target : MultiIndex or Index (of tuples) + method : {'pad', 'ffill', 'backfill', 'bfill'} + pad / ffill: propagate LAST valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + + Notes + ----- + This is a low-level method and probably should be used at your own risk + + Examples + -------- + >>> indexer, mask = index.get_indexer(new_index) + >>> new_values = cur_values.take(indexer) + >>> new_values[-mask] = np.nan + + Returns + ------- + (indexer, mask) : (ndarray, ndarray) + """ + method = _clean_reindex_fill_method(method) + target = ibase._ensure_index(target) + + if isinstance(target, CategoricalIndex): + target = target.categories + + if method == 'pad' or method == 'backfill': + raise NotImplementedError("method='pad' and method='backfill' not " + "implemented yet for CategoricalIndex") + elif method == 'nearest': + raise NotImplementedError("method='nearest' not implemented yet " + 'for CategoricalIndex') + else: + + codes = self.categories.get_indexer(target) + indexer, _ = self._engine.get_indexer_non_unique(codes) + + return com._ensure_platform_int(indexer) + + def get_indexer_non_unique(self, target): + """ this is the same for a CategoricalIndex for get_indexer; the API + returns the missing values as well + """ + target = ibase._ensure_index(target) + + if isinstance(target, CategoricalIndex): + target = target.categories + + codes = self.categories.get_indexer(target) + return self._engine.get_indexer_non_unique(codes) + + def _convert_list_indexer(self, keyarr, kind=None): + """ + we are passed a list indexer. + Return our indexer or raise if all of the values are not included in + the categories + """ + codes = self.categories.get_indexer(keyarr) + if (codes == -1).any(): + raise KeyError("a list-indexer must only include values that are " + "in the categories") + + return None + + def take(self, indexer, axis=0, allow_fill=True, fill_value=None): + """ + For internal compatibility with numpy arrays. + + # filling must always be None/nan here + # but is passed thru internally + assert isnull(fill_value) + + See also + -------- + numpy.ndarray.take + """ + + indexer = com._ensure_platform_int(indexer) + taken = self.codes.take(indexer) + return self._create_from_codes(taken) + + def delete(self, loc): + """ + Make new Index with passed location(-s) deleted + + Returns + ------- + new_index : Index + """ + return self._create_from_codes(np.delete(self.codes, loc)) + + def insert(self, loc, item): + """ + Make new Index inserting new item at location. Follows + Python list.append semantics for negative values + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : Index + + Raises + ------ + ValueError if the item is not in the categories + + """ + code = self.categories.get_indexer([item]) + if (code == -1): + raise TypeError("cannot insert an item into a CategoricalIndex " + "that is not already an existing category") + + codes = self.codes + codes = np.concatenate((codes[:loc], code, codes[loc:])) + return self._create_from_codes(codes) + + def append(self, other): + """ + Append a collection of CategoricalIndex options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + + Raises + ------ + ValueError if other is not in the categories + """ + to_concat, name = self._ensure_compat_append(other) + to_concat = [self._is_dtype_compat(c) for c in to_concat] + codes = np.concatenate([c.codes for c in to_concat]) + return self._create_from_codes(codes, name=name) + + @classmethod + def _add_comparison_methods(cls): + """ add in comparison methods """ + + def _make_compare(op): + def _evaluate_compare(self, other): + + # if we have a Categorical type, then must have the same + # categories + if isinstance(other, CategoricalIndex): + other = other._values + elif isinstance(other, Index): + other = self._create_categorical( + self, other._values, categories=self.categories, + ordered=self.ordered) + + if isinstance(other, (com.ABCCategorical, np.ndarray, + com.ABCSeries)): + if len(self.values) != len(other): + raise ValueError("Lengths must match to compare") + + if isinstance(other, com.ABCCategorical): + if not self.values.is_dtype_equal(other): + raise TypeError("categorical index comparisions must " + "have the same categories and ordered " + "attributes") + + return getattr(self.values, op)(other) + + return _evaluate_compare + + cls.__eq__ = _make_compare('__eq__') + cls.__ne__ = _make_compare('__ne__') + cls.__lt__ = _make_compare('__lt__') + cls.__gt__ = _make_compare('__gt__') + cls.__le__ = _make_compare('__le__') + cls.__ge__ = _make_compare('__ge__') + + def _delegate_method(self, name, *args, **kwargs): + """ method delegation to the ._values """ + method = getattr(self._values, name) + if 'inplace' in kwargs: + raise ValueError("cannot use inplace with CategoricalIndex") + res = method(*args, **kwargs) + if lib.isscalar(res): + return res + return CategoricalIndex(res, name=self.name) + + @classmethod + def _add_accessors(cls): + """ add in Categorical accessor methods """ + + from pandas.core.categorical import Categorical + CategoricalIndex._add_delegate_accessors( + delegate=Categorical, accessors=["rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", "as_unordered", + "min", "max"], + typ='method', overwrite=True) + + +CategoricalIndex._add_numericlike_set_methods_disabled() +CategoricalIndex._add_numeric_methods_disabled() +CategoricalIndex._add_logical_methods_disabled() +CategoricalIndex._add_comparison_methods() +CategoricalIndex._add_accessors() diff --git a/pandas/indexes/float.py b/pandas/indexes/float.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py new file mode 100644 index 0000000000000..2d0ad1925daa0 --- /dev/null +++ b/pandas/indexes/multi.py @@ -0,0 +1,2166 @@ +# pylint: disable=E1101,E1103,W0232 +import datetime +import warnings +from functools import partial +from sys import getsizeof + +import numpy as np +import pandas.lib as lib +import pandas.index as _index +from pandas.lib import Timestamp + +from pandas.compat import range, zip, lrange, lzip, map +from pandas import compat +from pandas.core.base import FrozenList +import pandas.core.base as base +from pandas.util.decorators import (Appender, cache_readonly, + deprecate, deprecate_kwarg) +import pandas.core.common as com +from pandas.core.missing import _clean_reindex_fill_method +from pandas.core.common import (isnull, array_equivalent, + is_object_dtype, + _values_from_object, + is_iterator, + _ensure_int64, is_bool_indexer, + is_list_like, is_null_slice) + +from pandas.core.config import get_option +from pandas.io.common import PerformanceWarning + +from pandas.indexes.base import (Index, _ensure_index, _ensure_frozen, + _get_na_value, InvalidIndexError) +import pandas.indexes.base as ibase + + +class MultiIndex(Index): + """ + A multi-level, or hierarchical, index object for pandas objects + + Parameters + ---------- + levels : sequence of arrays + The unique labels for each level + labels : sequence of arrays + Integers for each level designating which label at each location + sortorder : optional int + Level of sortedness (must be lexicographically sorted by that + level) + names : optional sequence of objects + Names for each of the index levels. (name is accepted for compat) + copy : boolean, default False + Copy the meta-data + verify_integrity : boolean, default True + Check that the levels/labels are consistent and valid + """ + + # initialize to zero-length tuples to make everything work + _typ = 'multiindex' + _names = FrozenList() + _levels = FrozenList() + _labels = FrozenList() + _comparables = ['names'] + rename = Index.set_names + + def __new__(cls, levels=None, labels=None, sortorder=None, names=None, + copy=False, verify_integrity=True, _set_identity=True, + name=None, **kwargs): + + # compat with Index + if name is not None: + names = name + if levels is None or labels is None: + raise TypeError("Must pass both levels and labels") + if len(levels) != len(labels): + raise ValueError('Length of levels and labels must be the same.') + if len(levels) == 0: + raise ValueError('Must pass non-zero number of levels/labels') + if len(levels) == 1: + if names: + name = names[0] + else: + name = None + return Index(levels[0], name=name, copy=True).take(labels[0]) + + result = object.__new__(MultiIndex) + + # we've already validated levels and labels, so shortcut here + result._set_levels(levels, copy=copy, validate=False) + result._set_labels(labels, copy=copy, validate=False) + + if names is not None: + # handles name validation + result._set_names(names) + + if sortorder is not None: + result.sortorder = int(sortorder) + else: + result.sortorder = sortorder + + if verify_integrity: + result._verify_integrity() + if _set_identity: + result._reset_identity() + + return result + + def _verify_integrity(self): + """Raises ValueError if length of levels and labels don't match or any + label would exceed level bounds""" + # NOTE: Currently does not check, among other things, that cached + # nlevels matches nor that sortorder matches actually sortorder. + labels, levels = self.labels, self.levels + if len(levels) != len(labels): + raise ValueError("Length of levels and labels must match. NOTE:" + " this index is in an inconsistent state.") + label_length = len(self.labels[0]) + for i, (level, label) in enumerate(zip(levels, labels)): + if len(label) != label_length: + raise ValueError("Unequal label lengths: %s" % + ([len(lab) for lab in labels])) + if len(label) and label.max() >= len(level): + raise ValueError("On level %d, label max (%d) >= length of" + " level (%d). NOTE: this index is in an" + " inconsistent state" % (i, label.max(), + len(level))) + + def _get_levels(self): + return self._levels + + def _set_levels(self, levels, level=None, copy=False, validate=True, + verify_integrity=False): + # This is NOT part of the levels property because it should be + # externally not allowed to set levels. User beware if you change + # _levels directly + if validate and len(levels) == 0: + raise ValueError('Must set non-zero number of levels.') + if validate and level is None and len(levels) != self.nlevels: + raise ValueError('Length of levels must match number of levels.') + if validate and level is not None and len(levels) != len(level): + raise ValueError('Length of levels must match length of level.') + + if level is None: + new_levels = FrozenList( + _ensure_index(lev, copy=copy)._shallow_copy() + for lev in levels) + else: + level = [self._get_level_number(l) for l in level] + new_levels = list(self._levels) + for l, v in zip(level, levels): + new_levels[l] = _ensure_index(v, copy=copy)._shallow_copy() + new_levels = FrozenList(new_levels) + + names = self.names + self._levels = new_levels + if any(names): + self._set_names(names) + + self._tuples = None + self._reset_cache() + + if verify_integrity: + self._verify_integrity() + + def set_levels(self, levels, level=None, inplace=False, + verify_integrity=True): + """ + Set new levels on MultiIndex. Defaults to returning + new index. + + Parameters + ---------- + levels : sequence or list of sequence + new level(s) to apply + level : int, level name, or sequence of int/level names (default None) + level(s) to set (None for all levels) + inplace : bool + if True, mutates in place + verify_integrity : bool (default True) + if True, checks that levels and labels are compatible + + Returns + ------- + new index (of same type and class...etc) + + + Examples + -------- + >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), + (2, u'one'), (2, u'two')], + names=['foo', 'bar']) + >>> idx.set_levels([['a','b'], [1,2]]) + MultiIndex(levels=[[u'a', u'b'], [1, 2]], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_levels(['a','b'], level=0) + MultiIndex(levels=[[u'a', u'b'], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_levels(['a','b'], level='bar') + MultiIndex(levels=[[1, 2], [u'a', u'b']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_levels([['a','b'], [1,2]], level=[0,1]) + MultiIndex(levels=[[u'a', u'b'], [1, 2]], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + """ + if level is not None and not is_list_like(level): + if not is_list_like(levels): + raise TypeError("Levels must be list-like") + if is_list_like(levels[0]): + raise TypeError("Levels must be list-like") + level = [level] + levels = [levels] + elif level is None or is_list_like(level): + if not is_list_like(levels) or not is_list_like(levels[0]): + raise TypeError("Levels must be list of lists-like") + + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._reset_identity() + idx._set_levels(levels, level=level, validate=True, + verify_integrity=verify_integrity) + if not inplace: + return idx + + # remove me in 0.14 and change to read only property + __set_levels = deprecate("setting `levels` directly", + partial(set_levels, inplace=True, + verify_integrity=True), + alt_name="set_levels") + levels = property(fget=_get_levels, fset=__set_levels) + + def _get_labels(self): + return self._labels + + def _set_labels(self, labels, level=None, copy=False, validate=True, + verify_integrity=False): + + if validate and level is None and len(labels) != self.nlevels: + raise ValueError("Length of labels must match number of levels") + if validate and level is not None and len(labels) != len(level): + raise ValueError('Length of labels must match length of levels.') + + if level is None: + new_labels = FrozenList( + _ensure_frozen(lab, lev, copy=copy)._shallow_copy() + for lev, lab in zip(self.levels, labels)) + else: + level = [self._get_level_number(l) for l in level] + new_labels = list(self._labels) + for l, lev, lab in zip(level, self.levels, labels): + new_labels[l] = _ensure_frozen( + lab, lev, copy=copy)._shallow_copy() + new_labels = FrozenList(new_labels) + + self._labels = new_labels + self._tuples = None + self._reset_cache() + + if verify_integrity: + self._verify_integrity() + + def set_labels(self, labels, level=None, inplace=False, + verify_integrity=True): + """ + Set new labels on MultiIndex. Defaults to returning + new index. + + Parameters + ---------- + labels : sequence or list of sequence + new labels to apply + level : int, level name, or sequence of int/level names (default None) + level(s) to set (None for all levels) + inplace : bool + if True, mutates in place + verify_integrity : bool (default True) + if True, checks that levels and labels are compatible + + Returns + ------- + new index (of same type and class...etc) + + Examples + -------- + >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), + (2, u'one'), (2, u'two')], + names=['foo', 'bar']) + >>> idx.set_labels([[1,0,1,0], [0,0,1,1]]) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[1, 0, 1, 0], [0, 0, 1, 1]], + names=[u'foo', u'bar']) + >>> idx.set_labels([1,0,1,0], level=0) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[1, 0, 1, 0], [0, 1, 0, 1]], + names=[u'foo', u'bar']) + >>> idx.set_labels([0,0,1,1], level='bar') + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 1, 1], [0, 0, 1, 1]], + names=[u'foo', u'bar']) + >>> idx.set_labels([[1,0,1,0], [0,0,1,1]], level=[0,1]) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[1, 0, 1, 0], [0, 0, 1, 1]], + names=[u'foo', u'bar']) + """ + if level is not None and not is_list_like(level): + if not is_list_like(labels): + raise TypeError("Labels must be list-like") + if is_list_like(labels[0]): + raise TypeError("Labels must be list-like") + level = [level] + labels = [labels] + elif level is None or is_list_like(level): + if not is_list_like(labels) or not is_list_like(labels[0]): + raise TypeError("Labels must be list of lists-like") + + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._reset_identity() + idx._set_labels(labels, level=level, verify_integrity=verify_integrity) + if not inplace: + return idx + + # remove me in 0.14 and change to readonly property + __set_labels = deprecate("setting labels directly", + partial(set_labels, inplace=True, + verify_integrity=True), + alt_name="set_labels") + labels = property(fget=_get_labels, fset=__set_labels) + + def copy(self, names=None, dtype=None, levels=None, labels=None, + deep=False, _set_identity=False): + """ + Make a copy of this object. Names, dtype, levels and labels can be + passed and will be set on new copy. + + Parameters + ---------- + names : sequence, optional + dtype : numpy dtype or pandas type, optional + levels : sequence, optional + labels : sequence, optional + + Returns + ------- + copy : MultiIndex + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + This could be potentially expensive on large MultiIndex objects. + """ + if deep: + from copy import deepcopy + levels = levels if levels is not None else deepcopy(self.levels) + labels = labels if labels is not None else deepcopy(self.labels) + names = names if names is not None else deepcopy(self.names) + else: + levels = self.levels + labels = self.labels + names = self.names + return MultiIndex(levels=levels, labels=labels, names=names, + sortorder=self.sortorder, verify_integrity=False, + _set_identity=_set_identity) + + def __array__(self, dtype=None): + """ the array interface, return my values """ + return self.values + + def view(self, cls=None): + """ this is defined as a copy with the same identity """ + result = self.copy() + result._id = self._id + return result + + def _shallow_copy_with_infer(self, values=None, **kwargs): + return self._shallow_copy(values, **kwargs) + + def _shallow_copy(self, values=None, **kwargs): + if values is not None: + if 'name' in kwargs: + kwargs['names'] = kwargs.pop('name', None) + # discards freq + kwargs.pop('freq', None) + return MultiIndex.from_tuples(values, **kwargs) + return self.view() + + @cache_readonly + def dtype(self): + return np.dtype('O') + + @cache_readonly + def nbytes(self): + """ return the number of bytes in the underlying data """ + level_nbytes = sum((i.nbytes for i in self.levels)) + label_nbytes = sum((i.nbytes for i in self.labels)) + names_nbytes = sum((getsizeof(i) for i in self.names)) + return level_nbytes + label_nbytes + names_nbytes + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + attrs = [ + ('levels', ibase.default_pprint(self._levels, + max_seq_items=False)), + ('labels', ibase.default_pprint(self._labels, + max_seq_items=False))] + if not all(name is None for name in self.names): + attrs.append(('names', ibase.default_pprint(self.names))) + if self.sortorder is not None: + attrs.append(('sortorder', ibase.default_pprint(self.sortorder))) + return attrs + + def _format_space(self): + return "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) + + def _format_data(self): + # we are formatting thru the attributes + return None + + def __len__(self): + return len(self.labels[0]) + + def _get_names(self): + return FrozenList(level.name for level in self.levels) + + def _set_names(self, names, level=None, validate=True): + """ + sets names on levels. WARNING: mutates! + + Note that you generally want to set this *after* changing levels, so + that it only acts on copies + """ + + names = list(names) + + if validate and level is not None and len(names) != len(level): + raise ValueError('Length of names must match length of level.') + if validate and level is None and len(names) != self.nlevels: + raise ValueError('Length of names must match number of levels in ' + 'MultiIndex.') + + if level is None: + level = range(self.nlevels) + else: + level = [self._get_level_number(l) for l in level] + + # set the name + for l, name in zip(level, names): + self.levels[l].rename(name, inplace=True) + + names = property(fset=_set_names, fget=_get_names, + doc="Names of levels in MultiIndex") + + def _reference_duplicate_name(self, name): + """ + Returns True if the name refered to in self.names is duplicated. + """ + # count the times name equals an element in self.names. + return sum(name == n for n in self.names) > 1 + + def _format_native_types(self, na_rep='nan', **kwargs): + new_levels = [] + new_labels = [] + + # go through the levels and format them + for level, label in zip(self.levels, self.labels): + level = level._format_native_types(na_rep=na_rep, **kwargs) + # add nan values, if there are any + mask = (label == -1) + if mask.any(): + nan_index = len(level) + level = np.append(level, na_rep) + label = label.values() + label[mask] = nan_index + new_levels.append(level) + new_labels.append(label) + + # reconstruct the multi-index + mi = MultiIndex(levels=new_levels, labels=new_labels, names=self.names, + sortorder=self.sortorder, verify_integrity=False) + + return mi.values + + @property + def _constructor(self): + return MultiIndex.from_tuples + + @cache_readonly + def inferred_type(self): + return 'mixed' + + @staticmethod + def _from_elements(values, labels=None, levels=None, names=None, + sortorder=None): + return MultiIndex(levels, labels, names, sortorder=sortorder) + + def _get_level_number(self, level): + try: + count = self.names.count(level) + if count > 1: + raise ValueError('The name %s occurs multiple times, use a ' + 'level number' % level) + level = self.names.index(level) + except ValueError: + if not isinstance(level, int): + raise KeyError('Level %s not found' % str(level)) + elif level < 0: + level += self.nlevels + if level < 0: + orig_level = level - self.nlevels + raise IndexError('Too many levels: Index has only %d ' + 'levels, %d is not a valid level number' % + (self.nlevels, orig_level)) + # Note: levels are zero-based + elif level >= self.nlevels: + raise IndexError('Too many levels: Index has only %d levels, ' + 'not %d' % (self.nlevels, level + 1)) + return level + + _tuples = None + + @property + def values(self): + if self._tuples is not None: + return self._tuples + + values = [] + for lev, lab in zip(self.levels, self.labels): + # Need to box timestamps, etc. + box = hasattr(lev, '_box_values') + # Try to minimize boxing. + if box and len(lev) > len(lab): + taken = lev._box_values(com.take_1d(lev._values, lab)) + elif box: + taken = com.take_1d(lev._box_values(lev._values), lab, + fill_value=_get_na_value(lev.dtype.type)) + else: + taken = com.take_1d(np.asarray(lev._values), lab) + values.append(taken) + + self._tuples = lib.fast_zip(values) + return self._tuples + + # fml + @property + def _is_v1(self): + return False + + @property + def _is_v2(self): + return False + + @property + def _has_complex_internals(self): + # to disable groupby tricks + return True + + @cache_readonly + def is_unique(self): + return not self.duplicated().any() + + @deprecate_kwarg('take_last', 'keep', mapping={True: 'last', + False: 'first'}) + @Appender(base._shared_docs['duplicated'] % ibase._index_doc_kwargs) + def duplicated(self, keep='first'): + from pandas.core.groupby import get_group_index + from pandas.hashtable import duplicated_int64 + + shape = map(len, self.levels) + ids = get_group_index(self.labels, shape, sort=False, xnull=False) + + return duplicated_int64(ids, keep) + + @Appender(ibase._index_shared_docs['fillna']) + def fillna(self, value=None, downcast=None): + # isnull is not implemented for MultiIndex + raise NotImplementedError('isnull is not defined for MultiIndex') + + def get_value(self, series, key): + # somewhat broken encapsulation + from pandas.core.indexing import maybe_droplevels + from pandas.core.series import Series + + # Label-based + s = _values_from_object(series) + k = _values_from_object(key) + + def _try_mi(k): + # TODO: what if a level contains tuples?? + loc = self.get_loc(k) + new_values = series._values[loc] + new_index = self[loc] + new_index = maybe_droplevels(new_index, k) + return Series(new_values, index=new_index, name=series.name) + + try: + return self._engine.get_value(s, k) + except KeyError as e1: + try: + return _try_mi(key) + except KeyError: + pass + + try: + return _index.get_value_at(s, k) + except IndexError: + raise + except TypeError: + # generator/iterator-like + if is_iterator(key): + raise InvalidIndexError(key) + else: + raise e1 + except Exception: # pragma: no cover + raise e1 + except TypeError: + + # a Timestamp will raise a TypeError in a multi-index + # rather than a KeyError, try it here + # note that a string that 'looks' like a Timestamp will raise + # a KeyError! (GH5725) + if (isinstance(key, (datetime.datetime, np.datetime64)) or + (compat.PY3 and isinstance(key, compat.string_types))): + try: + return _try_mi(key) + except (KeyError): + raise + except: + pass + + try: + return _try_mi(Timestamp(key)) + except: + pass + + raise InvalidIndexError(key) + + def get_level_values(self, level): + """ + Return vector of label values for requested level, equal to the length + of the index + + Parameters + ---------- + level : int or level name + + Returns + ------- + values : ndarray + """ + num = self._get_level_number(level) + unique = self.levels[num] # .values + labels = self.labels[num] + filled = com.take_1d(unique.values, labels, + fill_value=unique._na_value) + _simple_new = unique._simple_new + values = _simple_new(filled, self.names[num], + freq=getattr(unique, 'freq', None), + tz=getattr(unique, 'tz', None)) + return values + + def format(self, space=2, sparsify=None, adjoin=True, names=False, + na_rep=None, formatter=None): + if len(self) == 0: + return [] + + stringified_levels = [] + for lev, lab in zip(self.levels, self.labels): + na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) + + if len(lev) > 0: + + formatted = lev.take(lab).format(formatter=formatter) + + # we have some NA + mask = lab == -1 + if mask.any(): + formatted = np.array(formatted, dtype=object) + formatted[mask] = na + formatted = formatted.tolist() + + else: + # weird all NA case + formatted = [com.pprint_thing(na if isnull(x) else x, + escape_chars=('\t', '\r', '\n')) + for x in com.take_1d(lev._values, lab)] + stringified_levels.append(formatted) + + result_levels = [] + for lev, name in zip(stringified_levels, self.names): + level = [] + + if names: + level.append(com.pprint_thing(name, + escape_chars=('\t', '\r', '\n')) + if name is not None else '') + + level.extend(np.array(lev, dtype=object)) + result_levels.append(level) + + if sparsify is None: + sparsify = get_option("display.multi_sparse") + + if sparsify: + sentinel = '' + # GH3547 + # use value of sparsify as sentinel, unless it's an obvious + # "Truthey" value + if sparsify not in [True, 1]: + sentinel = sparsify + # little bit of a kludge job for #1217 + result_levels = _sparsify(result_levels, start=int(names), + sentinel=sentinel) + + if adjoin: + from pandas.core.format import _get_adjustment + adj = _get_adjustment() + return adj.adjoin(space, *result_levels).split('\n') + else: + return result_levels + + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) + + def to_hierarchical(self, n_repeat, n_shuffle=1): + """ + Return a MultiIndex reshaped to conform to the + shapes given by n_repeat and n_shuffle. + + Useful to replicate and rearrange a MultiIndex for combination + with another Index with n_repeat items. + + Parameters + ---------- + n_repeat : int + Number of times to repeat the labels on self + n_shuffle : int + Controls the reordering of the labels. If the result is going + to be an inner level in a MultiIndex, n_shuffle will need to be + greater than one. The size of each label must divisible by + n_shuffle. + + Returns + ------- + MultiIndex + + Examples + -------- + >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), + (2, u'one'), (2, u'two')]) + >>> idx.to_hierarchical(3) + MultiIndex(levels=[[1, 2], [u'one', u'two']], + labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) + """ + levels = self.levels + labels = [np.repeat(x, n_repeat) for x in self.labels] + # Assumes that each label is divisible by n_shuffle + labels = [x.reshape(n_shuffle, -1).ravel(1) for x in labels] + names = self.names + return MultiIndex(levels=levels, labels=labels, names=names) + + @property + def is_all_dates(self): + return False + + def is_lexsorted(self): + """ + Return True if the labels are lexicographically sorted + """ + return self.lexsort_depth == self.nlevels + + def is_lexsorted_for_tuple(self, tup): + """ + Return True if we are correctly lexsorted given the passed tuple + """ + return len(tup) <= self.lexsort_depth + + @cache_readonly + def lexsort_depth(self): + if self.sortorder is not None: + if self.sortorder == 0: + return self.nlevels + else: + return 0 + + int64_labels = [com._ensure_int64(lab) for lab in self.labels] + for k in range(self.nlevels, 0, -1): + if lib.is_lexsorted(int64_labels[:k]): + return k + + return 0 + + @classmethod + def from_arrays(cls, arrays, sortorder=None, names=None): + """ + Convert arrays to MultiIndex + + Parameters + ---------- + arrays : list / sequence of array-likes + Each array-like gives one level's value for each data point. + len(arrays) is the number of levels. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level) + + Returns + ------- + index : MultiIndex + + Examples + -------- + >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] + >>> MultiIndex.from_arrays(arrays, names=('number', 'color')) + + See Also + -------- + MultiIndex.from_tuples : Convert list of tuples to MultiIndex + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables + """ + from pandas.core.categorical import Categorical + + if len(arrays) == 1: + name = None if names is None else names[0] + return Index(arrays[0], name=name) + + cats = [Categorical.from_array(arr, ordered=True) for arr in arrays] + levels = [c.categories for c in cats] + labels = [c.codes for c in cats] + if names is None: + names = [getattr(arr, "name", None) for arr in arrays] + + return MultiIndex(levels=levels, labels=labels, sortorder=sortorder, + names=names, verify_integrity=False) + + @classmethod + def from_tuples(cls, tuples, sortorder=None, names=None): + """ + Convert list of tuples to MultiIndex + + Parameters + ---------- + tuples : list / sequence of tuple-likes + Each tuple is the index of one row/column. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level) + + Returns + ------- + index : MultiIndex + + Examples + -------- + >>> tuples = [(1, u'red'), (1, u'blue'), + (2, u'red'), (2, u'blue')] + >>> MultiIndex.from_tuples(tuples, names=('number', 'color')) + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables + """ + if len(tuples) == 0: + # I think this is right? Not quite sure... + raise TypeError('Cannot infer number of levels from empty list') + + if isinstance(tuples, (np.ndarray, Index)): + if isinstance(tuples, Index): + tuples = tuples._values + + arrays = list(lib.tuples_to_object_array(tuples).T) + elif isinstance(tuples, list): + arrays = list(lib.to_object_array_tuples(tuples).T) + else: + arrays = lzip(*tuples) + + return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) + + @classmethod + def from_product(cls, iterables, sortorder=None, names=None): + """ + Make a MultiIndex from the cartesian product of multiple iterables + + Parameters + ---------- + iterables : list / sequence of iterables + Each iterable has unique labels for each level of the index. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of strings or None + Names for the levels in the index. + + Returns + ------- + index : MultiIndex + + Examples + -------- + >>> numbers = [0, 1, 2] + >>> colors = [u'green', u'purple'] + >>> MultiIndex.from_product([numbers, colors], + names=['number', 'color']) + MultiIndex(levels=[[0, 1, 2], [u'green', u'purple']], + labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=[u'number', u'color']) + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex + MultiIndex.from_tuples : Convert list of tuples to MultiIndex + """ + from pandas.core.categorical import Categorical + from pandas.tools.util import cartesian_product + + categoricals = [Categorical.from_array(it, ordered=True) + for it in iterables] + labels = cartesian_product([c.codes for c in categoricals]) + + return MultiIndex(levels=[c.categories for c in categoricals], + labels=labels, sortorder=sortorder, names=names) + + @property + def nlevels(self): + return len(self.levels) + + @property + def levshape(self): + return tuple(len(x) for x in self.levels) + + def __contains__(self, key): + hash(key) + # work around some kind of odd cython bug + try: + self.get_loc(key) + return True + except LookupError: + return False + + def __reduce__(self): + """Necessary for making this object picklable""" + d = dict(levels=[lev for lev in self.levels], + labels=[label for label in self.labels], + sortorder=self.sortorder, names=list(self.names)) + return ibase._new_Index, (self.__class__, d), None + + def __setstate__(self, state): + """Necessary for making this object picklable""" + + if isinstance(state, dict): + levels = state.get('levels') + labels = state.get('labels') + sortorder = state.get('sortorder') + names = state.get('names') + + elif isinstance(state, tuple): + + nd_state, own_state = state + levels, labels, sortorder, names = own_state + + self._set_levels([Index(x) for x in levels], validate=False) + self._set_labels(labels) + self._set_names(names) + self.sortorder = sortorder + self._verify_integrity() + self._reset_identity() + + def __getitem__(self, key): + if np.isscalar(key): + retval = [] + for lev, lab in zip(self.levels, self.labels): + if lab[key] == -1: + retval.append(np.nan) + else: + retval.append(lev[lab[key]]) + + return tuple(retval) + else: + if is_bool_indexer(key): + key = np.asarray(key) + sortorder = self.sortorder + else: + # cannot be sure whether the result will be sorted + sortorder = None + + new_labels = [lab[key] for lab in self.labels] + + return MultiIndex(levels=self.levels, labels=new_labels, + names=self.names, sortorder=sortorder, + verify_integrity=False) + + def take(self, indexer, axis=None): + indexer = com._ensure_platform_int(indexer) + new_labels = [lab.take(indexer) for lab in self.labels] + return MultiIndex(levels=self.levels, labels=new_labels, + names=self.names, verify_integrity=False) + + def append(self, other): + """ + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + """ + if not isinstance(other, (list, tuple)): + other = [other] + + if all((isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) + for o in other): + arrays = [] + for i in range(self.nlevels): + label = self.get_level_values(i) + appended = [o.get_level_values(i) for o in other] + arrays.append(label.append(appended)) + return MultiIndex.from_arrays(arrays, names=self.names) + + to_concat = (self.values, ) + tuple(k._values for k in other) + new_tuples = np.concatenate(to_concat) + + # if all(isinstance(x, MultiIndex) for x in other): + try: + return MultiIndex.from_tuples(new_tuples, names=self.names) + except: + return Index(new_tuples) + + def argsort(self, *args, **kwargs): + return self.values.argsort(*args, **kwargs) + + def repeat(self, n): + return MultiIndex(levels=self.levels, + labels=[label.view(np.ndarray).repeat(n) + for label in self.labels], names=self.names, + sortorder=self.sortorder, verify_integrity=False) + + def drop(self, labels, level=None, errors='raise'): + """ + Make new MultiIndex with passed list of labels deleted + + Parameters + ---------- + labels : array-like + Must be a list of tuples + level : int or level name, default None + + Returns + ------- + dropped : MultiIndex + """ + if level is not None: + return self._drop_from_level(labels, level) + + try: + if not isinstance(labels, (np.ndarray, Index)): + labels = com._index_labels_to_array(labels) + indexer = self.get_indexer(labels) + mask = indexer == -1 + if mask.any(): + if errors != 'ignore': + raise ValueError('labels %s not contained in axis' % + labels[mask]) + indexer = indexer[~mask] + except Exception: + pass + + inds = [] + for label in labels: + try: + loc = self.get_loc(label) + if isinstance(loc, int): + inds.append(loc) + else: + inds.extend(lrange(loc.start, loc.stop)) + except KeyError: + if errors != 'ignore': + raise + + return self.delete(inds) + + def _drop_from_level(self, labels, level): + labels = com._index_labels_to_array(labels) + i = self._get_level_number(level) + index = self.levels[i] + values = index.get_indexer(labels) + + mask = ~lib.ismember(self.labels[i], set(values)) + + return self[mask] + + def droplevel(self, level=0): + """ + Return Index with requested level removed. If MultiIndex has only 2 + levels, the result will be of Index type not MultiIndex. + + Parameters + ---------- + level : int/level name or list thereof + + Notes + ----- + Does not check if result index is unique or not + + Returns + ------- + index : Index or MultiIndex + """ + levels = level + if not isinstance(levels, (tuple, list)): + levels = [level] + + new_levels = list(self.levels) + new_labels = list(self.labels) + new_names = list(self.names) + + levnums = sorted(self._get_level_number(lev) for lev in levels)[::-1] + + for i in levnums: + new_levels.pop(i) + new_labels.pop(i) + new_names.pop(i) + + if len(new_levels) == 1: + + # set nan if needed + mask = new_labels[0] == -1 + result = new_levels[0].take(new_labels[0]) + if mask.any(): + result = result.putmask(mask, np.nan) + + result.name = new_names[0] + return result + else: + return MultiIndex(levels=new_levels, labels=new_labels, + names=new_names, verify_integrity=False) + + def swaplevel(self, i, j): + """ + Swap level i with level j. Do not change the ordering of anything + + Parameters + ---------- + i, j : int, string (can be mixed) + Level of index to be swapped. Can pass level name as string. + + Returns + ------- + swapped : MultiIndex + """ + new_levels = list(self.levels) + new_labels = list(self.labels) + new_names = list(self.names) + + i = self._get_level_number(i) + j = self._get_level_number(j) + + new_levels[i], new_levels[j] = new_levels[j], new_levels[i] + new_labels[i], new_labels[j] = new_labels[j], new_labels[i] + new_names[i], new_names[j] = new_names[j], new_names[i] + + return MultiIndex(levels=new_levels, labels=new_labels, + names=new_names, verify_integrity=False) + + def reorder_levels(self, order): + """ + Rearrange levels using input order. May not drop or duplicate levels + + Parameters + ---------- + """ + order = [self._get_level_number(i) for i in order] + if len(order) != self.nlevels: + raise AssertionError('Length of order must be same as ' + 'number of levels (%d), got %d' % + (self.nlevels, len(order))) + new_levels = [self.levels[i] for i in order] + new_labels = [self.labels[i] for i in order] + new_names = [self.names[i] for i in order] + + return MultiIndex(levels=new_levels, labels=new_labels, + names=new_names, verify_integrity=False) + + def __getslice__(self, i, j): + return self.__getitem__(slice(i, j)) + + def sortlevel(self, level=0, ascending=True, sort_remaining=True): + """ + Sort MultiIndex at the requested level. The result will respect the + original ordering of the associated factor at that level. + + Parameters + ---------- + level : list-like, int or str, default 0 + If a string is given, must be a name of the level + If list-like must be names or ints of levels. + ascending : boolean, default True + False to sort in descending order + Can also be a list to specify a directed ordering + sort_remaining : sort by the remaining levels after level. + + Returns + ------- + sorted_index : MultiIndex + """ + from pandas.core.groupby import _indexer_from_factorized + + if isinstance(level, (compat.string_types, int)): + level = [level] + level = [self._get_level_number(lev) for lev in level] + sortorder = None + + # we have a directed ordering via ascending + if isinstance(ascending, list): + if not len(level) == len(ascending): + raise ValueError("level must have same length as ascending") + + from pandas.core.groupby import _lexsort_indexer + indexer = _lexsort_indexer(self.labels, orders=ascending) + + # level ordering + else: + + labels = list(self.labels) + shape = list(self.levshape) + + # partition labels and shape + primary = tuple(labels.pop(lev - i) for i, lev in enumerate(level)) + primshp = tuple(shape.pop(lev - i) for i, lev in enumerate(level)) + + if sort_remaining: + primary += primary + tuple(labels) + primshp += primshp + tuple(shape) + else: + sortorder = level[0] + + indexer = _indexer_from_factorized(primary, primshp, + compress=False) + + if not ascending: + indexer = indexer[::-1] + + indexer = com._ensure_platform_int(indexer) + new_labels = [lab.take(indexer) for lab in self.labels] + + new_index = MultiIndex(labels=new_labels, levels=self.levels, + names=self.names, sortorder=sortorder, + verify_integrity=False) + + return new_index, indexer + + def get_indexer(self, target, method=None, limit=None, tolerance=None): + """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. The mask determines whether labels are + found or not in the current index + + Parameters + ---------- + target : MultiIndex or Index (of tuples) + method : {'pad', 'ffill', 'backfill', 'bfill'} + pad / ffill: propagate LAST valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + + Notes + ----- + This is a low-level method and probably should be used at your own risk + + Examples + -------- + >>> indexer, mask = index.get_indexer(new_index) + >>> new_values = cur_values.take(indexer) + >>> new_values[-mask] = np.nan + + Returns + ------- + (indexer, mask) : (ndarray, ndarray) + """ + method = _clean_reindex_fill_method(method) + + target = _ensure_index(target) + + target_index = target + if isinstance(target, MultiIndex): + target_index = target._tuple_index + + if not is_object_dtype(target_index.dtype): + return np.ones(len(target_index)) * -1 + + if not self.is_unique: + raise Exception('Reindexing only valid with uniquely valued Index ' + 'objects') + + self_index = self._tuple_index + + if method == 'pad' or method == 'backfill': + if tolerance is not None: + raise NotImplementedError("tolerance not implemented yet " + 'for MultiIndex') + indexer = self_index._get_fill_indexer(target, method, limit) + elif method == 'nearest': + raise NotImplementedError("method='nearest' not implemented yet " + 'for MultiIndex; see GitHub issue 9365') + else: + indexer = self_index._engine.get_indexer(target._values) + + return com._ensure_platform_int(indexer) + + def reindex(self, target, method=None, level=None, limit=None, + tolerance=None): + """ + Create index with target's values (move/add/delete values as necessary) + + Returns + ------- + new_index : pd.MultiIndex + Resulting index + indexer : np.ndarray or None + Indices of output values in original index + + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, 'names') + + if level is not None: + if method is not None: + raise TypeError('Fill method not supported if level passed') + + # GH7774: preserve dtype/tz if target is empty and not an Index. + # target may be an iterator + target = ibase._ensure_has_len(target) + if len(target) == 0 and not isinstance(target, Index): + idx = self.levels[level] + attrs = idx._get_attributes_dict() + attrs.pop('freq', None) # don't preserve freq + target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype), + **attrs) + else: + target = _ensure_index(target) + target, indexer, _ = self._join_level(target, level, how='right', + return_indexers=True, + keep_order=False) + else: + if self.equals(target): + indexer = None + else: + if self.is_unique: + indexer = self.get_indexer(target, method=method, + limit=limit, + tolerance=tolerance) + else: + raise Exception("cannot handle a non-unique multi-index!") + + if not isinstance(target, MultiIndex): + if indexer is None: + target = self + elif (indexer >= 0).all(): + target = self.take(indexer) + else: + # hopefully? + target = MultiIndex.from_tuples(target) + + if (preserve_names and target.nlevels == self.nlevels and + target.names != self.names): + target = target.copy(deep=False) + target.names = self.names + + return target, indexer + + @cache_readonly + def _tuple_index(self): + """ + Convert MultiIndex to an Index of tuples + + Returns + ------- + index : Index + """ + return Index(self._values) + + def get_slice_bound(self, label, side, kind): + if not isinstance(label, tuple): + label = label, + return self._partial_tup_index(label, side=side) + + def slice_locs(self, start=None, end=None, step=None, kind=None): + """ + For an ordered MultiIndex, compute the slice locations for input + labels. They can be tuples representing partial levels, e.g. for a + MultiIndex with 3 levels, you can pass a single value (corresponding to + the first level), or a 1-, 2-, or 3-tuple. + + Parameters + ---------- + start : label or tuple, default None + If None, defaults to the beginning + end : label or tuple + If None, defaults to the end + step : int or None + Slice step + kind : string, optional, defaults None + + Returns + ------- + (start, end) : (int, int) + + Notes + ----- + This function assumes that the data is sorted by the first level + """ + # This function adds nothing to its parent implementation (the magic + # happens in get_slice_bound method), but it adds meaningful doc. + return super(MultiIndex, self).slice_locs(start, end, step, kind=kind) + + def _partial_tup_index(self, tup, side='left'): + if len(tup) > self.lexsort_depth: + raise KeyError('Key length (%d) was greater than MultiIndex' + ' lexsort depth (%d)' % + (len(tup), self.lexsort_depth)) + + n = len(tup) + start, end = 0, len(self) + zipped = zip(tup, self.levels, self.labels) + for k, (lab, lev, labs) in enumerate(zipped): + section = labs[start:end] + + if lab not in lev: + if not lev.is_type_compatible(lib.infer_dtype([lab])): + raise TypeError('Level type mismatch: %s' % lab) + + # short circuit + loc = lev.searchsorted(lab, side=side) + if side == 'right' and loc >= 0: + loc -= 1 + return start + section.searchsorted(loc, side=side) + + idx = lev.get_loc(lab) + if k < n - 1: + end = start + section.searchsorted(idx, side='right') + start = start + section.searchsorted(idx, side='left') + else: + return start + section.searchsorted(idx, side=side) + + def get_loc(self, key, method=None): + """ + Get integer location, slice or boolean mask for requested label or + tuple. If the key is past the lexsort depth, the return may be a + boolean mask array, otherwise it is always a slice or int. + + Parameters + ---------- + key : label or tuple + method : None + + Returns + ------- + loc : int, slice object or boolean mask + """ + if method is not None: + raise NotImplementedError('only the default get_loc method is ' + 'currently supported for MultiIndex') + + def _maybe_to_slice(loc): + '''convert integer indexer to boolean mask or slice if possible''' + if not isinstance(loc, np.ndarray) or loc.dtype != 'int64': + return loc + + loc = lib.maybe_indices_to_slice(loc, len(self)) + if isinstance(loc, slice): + return loc + + mask = np.empty(len(self), dtype='bool') + mask.fill(False) + mask[loc] = True + return mask + + if not isinstance(key, tuple): + loc = self._get_level_indexer(key, level=0) + return _maybe_to_slice(loc) + + keylen = len(key) + if self.nlevels < keylen: + raise KeyError('Key length ({0}) exceeds index depth ({1})' + ''.format(keylen, self.nlevels)) + + if keylen == self.nlevels and self.is_unique: + + def _maybe_str_to_time_stamp(key, lev): + if lev.is_all_dates and not isinstance(key, Timestamp): + try: + return Timestamp(key, tz=getattr(lev, 'tz', None)) + except Exception: + pass + return key + + key = _values_from_object(key) + key = tuple(map(_maybe_str_to_time_stamp, key, self.levels)) + return self._engine.get_loc(key) + + # -- partial selection or non-unique index + # break the key into 2 parts based on the lexsort_depth of the index; + # the first part returns a continuous slice of the index; the 2nd part + # needs linear search within the slice + i = self.lexsort_depth + lead_key, follow_key = key[:i], key[i:] + start, stop = (self.slice_locs(lead_key, lead_key) + if lead_key else (0, len(self))) + + if start == stop: + raise KeyError(key) + + if not follow_key: + return slice(start, stop) + + warnings.warn('indexing past lexsort depth may impact performance.', + PerformanceWarning, stacklevel=10) + + loc = np.arange(start, stop, dtype='int64') + + for i, k in enumerate(follow_key, len(lead_key)): + mask = self.labels[i][loc] == self.levels[i].get_loc(k) + if not mask.all(): + loc = loc[mask] + if not len(loc): + raise KeyError(key) + + return (_maybe_to_slice(loc) if len(loc) != stop - start else + slice(start, stop)) + + def get_loc_level(self, key, level=0, drop_level=True): + """ + Get integer location slice for requested label or tuple + + Parameters + ---------- + key : label or tuple + level : int/level name or list thereof + + Returns + ------- + loc : int or slice object + """ + + def maybe_droplevels(indexer, levels, drop_level): + if not drop_level: + return self[indexer] + # kludgearound + orig_index = new_index = self[indexer] + levels = [self._get_level_number(i) for i in levels] + for i in sorted(levels, reverse=True): + try: + new_index = new_index.droplevel(i) + except: + + # no dropping here + return orig_index + return new_index + + if isinstance(level, (tuple, list)): + if len(key) != len(level): + raise AssertionError('Key for location must have same ' + 'length as number of levels') + result = None + for lev, k in zip(level, key): + loc, new_index = self.get_loc_level(k, level=lev) + if isinstance(loc, slice): + mask = np.zeros(len(self), dtype=bool) + mask[loc] = True + loc = mask + + result = loc if result is None else result & loc + + return result, maybe_droplevels(result, level, drop_level) + + level = self._get_level_number(level) + + # kludge for #1796 + if isinstance(key, list): + key = tuple(key) + + if isinstance(key, tuple) and level == 0: + + try: + if key in self.levels[0]: + indexer = self._get_level_indexer(key, level=level) + new_index = maybe_droplevels(indexer, [0], drop_level) + return indexer, new_index + except TypeError: + pass + + if not any(isinstance(k, slice) for k in key): + + # partial selection + # optionally get indexer to avoid re-calculation + def partial_selection(key, indexer=None): + if indexer is None: + indexer = self.get_loc(key) + ilevels = [i for i in range(len(key)) + if key[i] != slice(None, None)] + return indexer, maybe_droplevels(indexer, ilevels, + drop_level) + + if len(key) == self.nlevels: + + if self.is_unique: + + # here we have a completely specified key, but are + # using some partial string matching here + # GH4758 + all_dates = [(l.is_all_dates and + not isinstance(k, compat.string_types)) + for k, l in zip(key, self.levels)] + can_index_exactly = any(all_dates) + if (any([l.is_all_dates + for k, l in zip(key, self.levels)]) and + not can_index_exactly): + indexer = self.get_loc(key) + + # we have a multiple selection here + if (not isinstance(indexer, slice) or + indexer.stop - indexer.start != 1): + return partial_selection(key, indexer) + + key = tuple(self[indexer].tolist()[0]) + + return (self._engine.get_loc(_values_from_object(key)), + None) + else: + return partial_selection(key) + else: + return partial_selection(key) + else: + indexer = None + for i, k in enumerate(key): + if not isinstance(k, slice): + k = self._get_level_indexer(k, level=i) + if isinstance(k, slice): + # everything + if k.start == 0 and k.stop == len(self): + k = slice(None, None) + else: + k_index = k + + if isinstance(k, slice): + if k == slice(None, None): + continue + else: + raise TypeError(key) + + if indexer is None: + indexer = k_index + else: # pragma: no cover + indexer &= k_index + if indexer is None: + indexer = slice(None, None) + ilevels = [i for i in range(len(key)) + if key[i] != slice(None, None)] + return indexer, maybe_droplevels(indexer, ilevels, drop_level) + else: + indexer = self._get_level_indexer(key, level=level) + return indexer, maybe_droplevels(indexer, [level], drop_level) + + def _get_level_indexer(self, key, level=0, indexer=None): + # return an indexer, boolean array or a slice showing where the key is + # in the totality of values + # if the indexer is provided, then use this + + level_index = self.levels[level] + labels = self.labels[level] + + def convert_indexer(start, stop, step, indexer=indexer, labels=labels): + # given the inputs and the labels/indexer, compute an indexer set + # if we have a provided indexer, then this need not consider + # the entire labels set + + r = np.arange(start, stop, step) + if indexer is not None and len(indexer) != len(labels): + + # we have an indexer which maps the locations in the labels + # that we have already selected (and is not an indexer for the + # entire set) otherwise this is wasteful so we only need to + # examine locations that are in this set the only magic here is + # that the result are the mappings to the set that we have + # selected + from pandas import Series + mapper = Series(indexer) + indexer = labels.take(com._ensure_platform_int(indexer)) + result = Series(Index(indexer).isin(r).nonzero()[0]) + m = result.map(mapper)._values + + else: + m = np.zeros(len(labels), dtype=bool) + m[np.in1d(labels, r, assume_unique=True)] = True + + return m + + if isinstance(key, slice): + # handle a slice, returnig a slice if we can + # otherwise a boolean indexer + + try: + if key.start is not None: + start = level_index.get_loc(key.start) + else: + start = 0 + if key.stop is not None: + stop = level_index.get_loc(key.stop) + else: + stop = len(level_index) - 1 + step = key.step + except KeyError: + + # we have a partial slice (like looking up a partial date + # string) + start = stop = level_index.slice_indexer(key.start, key.stop, + key.step) + step = start.step + + if isinstance(start, slice) or isinstance(stop, slice): + # we have a slice for start and/or stop + # a partial date slicer on a DatetimeIndex generates a slice + # note that the stop ALREADY includes the stopped point (if + # it was a string sliced) + return convert_indexer(start.start, stop.stop, step) + + elif level > 0 or self.lexsort_depth == 0 or step is not None: + # need to have like semantics here to right + # searching as when we are using a slice + # so include the stop+1 (so we include stop) + return convert_indexer(start, stop + 1, step) + else: + # sorted, so can return slice object -> view + i = labels.searchsorted(start, side='left') + j = labels.searchsorted(stop, side='right') + return slice(i, j, step) + + else: + + loc = level_index.get_loc(key) + if level > 0 or self.lexsort_depth == 0: + return np.array(labels == loc, dtype=bool) + else: + # sorted, so can return slice object -> view + i = labels.searchsorted(loc, side='left') + j = labels.searchsorted(loc, side='right') + return slice(i, j) + + def get_locs(self, tup): + """ + Given a tuple of slices/lists/labels/boolean indexer to a level-wise + spec produce an indexer to extract those locations + + Parameters + ---------- + key : tuple of (slices/list/labels) + + Returns + ------- + locs : integer list of locations or boolean indexer suitable + for passing to iloc + """ + + # must be lexsorted to at least as many levels + if not self.is_lexsorted_for_tuple(tup): + raise KeyError('MultiIndex Slicing requires the index to be fully ' + 'lexsorted tuple len ({0}), lexsort depth ' + '({1})'.format(len(tup), self.lexsort_depth)) + + # indexer + # this is the list of all values that we want to select + n = len(self) + indexer = None + + def _convert_to_indexer(r): + # return an indexer + if isinstance(r, slice): + m = np.zeros(n, dtype=bool) + m[r] = True + r = m.nonzero()[0] + elif is_bool_indexer(r): + if len(r) != n: + raise ValueError("cannot index with a boolean indexer " + "that is not the same length as the " + "index") + r = r.nonzero()[0] + from .numeric import Int64Index + return Int64Index(r) + + def _update_indexer(idxr, indexer=indexer): + if indexer is None: + indexer = Index(np.arange(n)) + if idxr is None: + return indexer + return indexer & idxr + + for i, k in enumerate(tup): + + if is_bool_indexer(k): + # a boolean indexer, must be the same length! + k = np.asarray(k) + indexer = _update_indexer(_convert_to_indexer(k), + indexer=indexer) + + elif is_list_like(k): + # a collection of labels to include from this level (these + # are or'd) + indexers = None + for x in k: + try: + idxrs = _convert_to_indexer( + self._get_level_indexer(x, level=i, + indexer=indexer)) + indexers = (idxrs if indexers is None + else indexers | idxrs) + except KeyError: + + # ignore not founds + continue + + if indexers is not None: + indexer = _update_indexer(indexers, indexer=indexer) + else: + from .numeric import Int64Index + # no matches we are done + return Int64Index([])._values + + elif is_null_slice(k): + # empty slice + indexer = _update_indexer(None, indexer=indexer) + + elif isinstance(k, slice): + + # a slice, include BOTH of the labels + indexer = _update_indexer(_convert_to_indexer( + self._get_level_indexer(k, level=i, indexer=indexer)), + indexer=indexer) + else: + # a single label + indexer = _update_indexer(_convert_to_indexer( + self.get_loc_level(k, level=i, drop_level=False)[0]), + indexer=indexer) + + # empty indexer + if indexer is None: + return Int64Index([])._values + return indexer._values + + def truncate(self, before=None, after=None): + """ + Slice index between two labels / tuples, return new MultiIndex + + Parameters + ---------- + before : label or tuple, can be partial. Default None + None defaults to start + after : label or tuple, can be partial. Default None + None defaults to end + + Returns + ------- + truncated : MultiIndex + """ + if after and before and after < before: + raise ValueError('after < before') + + i, j = self.levels[0].slice_locs(before, after) + left, right = self.slice_locs(before, after) + + new_levels = list(self.levels) + new_levels[0] = new_levels[0][i:j] + + new_labels = [lab[left:right] for lab in self.labels] + new_labels[0] = new_labels[0] - i + + return MultiIndex(levels=new_levels, labels=new_labels, + verify_integrity=False) + + def equals(self, other): + """ + Determines if two MultiIndex objects have the same labeling information + (the levels themselves do not necessarily have to be the same) + + See also + -------- + equal_levels + """ + if self.is_(other): + return True + + if not isinstance(other, MultiIndex): + return array_equivalent(self._values, + _values_from_object(_ensure_index(other))) + + if self.nlevels != other.nlevels: + return False + + if len(self) != len(other): + return False + + for i in range(self.nlevels): + svalues = com.take_nd(np.asarray(self.levels[i]._values), + self.labels[i], allow_fill=False) + ovalues = com.take_nd(np.asarray(other.levels[i]._values), + other.labels[i], allow_fill=False) + if not array_equivalent(svalues, ovalues): + return False + + return True + + def equal_levels(self, other): + """ + Return True if the levels of both MultiIndex objects are the same + + """ + if self.nlevels != other.nlevels: + return False + + for i in range(self.nlevels): + if not self.levels[i].equals(other.levels[i]): + return False + return True + + def union(self, other): + """ + Form the union of two MultiIndex objects, sorting if possible + + Parameters + ---------- + other : MultiIndex or array / Index of tuples + + Returns + ------- + Index + + >>> index.union(index2) + """ + self._assert_can_do_setop(other) + other, result_names = self._convert_can_do_setop(other) + + if len(other) == 0 or self.equals(other): + return self + + uniq_tuples = lib.fast_unique_multiple([self._values, other._values]) + return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, + names=result_names) + + def intersection(self, other): + """ + Form the intersection of two MultiIndex objects, sorting if possible + + Parameters + ---------- + other : MultiIndex or array / Index of tuples + + Returns + ------- + Index + """ + self._assert_can_do_setop(other) + other, result_names = self._convert_can_do_setop(other) + + if self.equals(other): + return self + + self_tuples = self._values + other_tuples = other._values + uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) + if len(uniq_tuples) == 0: + return MultiIndex(levels=[[]] * self.nlevels, + labels=[[]] * self.nlevels, + names=result_names, verify_integrity=False) + else: + return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, + names=result_names) + + def difference(self, other): + """ + Compute sorted set difference of two MultiIndex objects + + Returns + ------- + diff : MultiIndex + """ + self._assert_can_do_setop(other) + other, result_names = self._convert_can_do_setop(other) + + if len(other) == 0: + return self + + if self.equals(other): + return MultiIndex(levels=[[]] * self.nlevels, + labels=[[]] * self.nlevels, + names=result_names, verify_integrity=False) + + difference = sorted(set(self._values) - set(other._values)) + + if len(difference) == 0: + return MultiIndex(levels=[[]] * self.nlevels, + labels=[[]] * self.nlevels, + names=result_names, verify_integrity=False) + else: + return MultiIndex.from_tuples(difference, sortorder=0, + names=result_names) + + def astype(self, dtype): + if not is_object_dtype(np.dtype(dtype)): + raise TypeError('Setting %s dtype to anything other than object ' + 'is not supported' % self.__class__) + return self._shallow_copy() + + def _convert_can_do_setop(self, other): + result_names = self.names + + if not hasattr(other, 'names'): + if len(other) == 0: + other = MultiIndex(levels=[[]] * self.nlevels, + labels=[[]] * self.nlevels, + verify_integrity=False) + else: + msg = 'other must be a MultiIndex or a list of tuples' + try: + other = MultiIndex.from_tuples(other) + except: + raise TypeError(msg) + else: + result_names = self.names if self.names == other.names else None + return other, result_names + + def insert(self, loc, item): + """ + Make new MultiIndex inserting new item at location + + Parameters + ---------- + loc : int + item : tuple + Must be same length as number of levels in the MultiIndex + + Returns + ------- + new_index : Index + """ + # Pad the key with empty strings if lower levels of the key + # aren't specified: + if not isinstance(item, tuple): + item = (item, ) + ('', ) * (self.nlevels - 1) + elif len(item) != self.nlevels: + raise ValueError('Item must have length equal to number of ' + 'levels.') + + new_levels = [] + new_labels = [] + for k, level, labels in zip(item, self.levels, self.labels): + if k not in level: + # have to insert into level + # must insert at end otherwise you have to recompute all the + # other labels + lev_loc = len(level) + level = level.insert(lev_loc, k) + else: + lev_loc = level.get_loc(k) + + new_levels.append(level) + new_labels.append(np.insert(_ensure_int64(labels), loc, lev_loc)) + + return MultiIndex(levels=new_levels, labels=new_labels, + names=self.names, verify_integrity=False) + + def delete(self, loc): + """ + Make new index with passed location deleted + + Returns + ------- + new_index : MultiIndex + """ + new_labels = [np.delete(lab, loc) for lab in self.labels] + return MultiIndex(levels=self.levels, labels=new_labels, + names=self.names, verify_integrity=False) + + get_major_bounds = slice_locs + + __bounds = None + + @property + def _bounds(self): + """ + Return or compute and return slice points for level 0, assuming + sortedness + """ + if self.__bounds is None: + inds = np.arange(len(self.levels[0])) + self.__bounds = self.labels[0].searchsorted(inds) + + return self.__bounds + + def _wrap_joined_index(self, joined, other): + names = self.names if self.names == other.names else None + return MultiIndex.from_tuples(joined, names=names) + + @Appender(Index.isin.__doc__) + def isin(self, values, level=None): + if level is None: + return lib.ismember(np.array(self), set(values)) + else: + num = self._get_level_number(level) + levs = self.levels[num] + labs = self.labels[num] + + sought_labels = levs.isin(values).nonzero()[0] + if levs.size == 0: + return np.zeros(len(labs), dtype=np.bool_) + else: + return np.lib.arraysetops.in1d(labs, sought_labels) + + +MultiIndex._add_numeric_methods_disabled() +MultiIndex._add_logical_methods_disabled() + + +def _sparsify(label_list, start=0, sentinel=''): + pivoted = lzip(*label_list) + k = len(label_list) + + result = pivoted[:start + 1] + prev = pivoted[start] + + for cur in pivoted[start + 1:]: + sparse_cur = [] + + for i, (p, t) in enumerate(zip(prev, cur)): + if i == k - 1: + sparse_cur.append(t) + result.append(sparse_cur) + break + + if p == t: + sparse_cur.append(sentinel) + else: + sparse_cur.extend(cur[i:]) + result.append(sparse_cur) + break + + prev = cur + + return lzip(*result) + + +def _get_na_rep(dtype): + return {np.datetime64: 'NaT', np.timedelta64: 'NaT'}.get(dtype, 'NaN') diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py new file mode 100644 index 0000000000000..61d93284adbbb --- /dev/null +++ b/pandas/indexes/numeric.py @@ -0,0 +1,369 @@ +import numpy as np +import pandas.lib as lib +import pandas.algos as _algos +import pandas.index as _index + +from pandas import compat +from pandas.indexes.base import Index, InvalidIndexError +from pandas.util.decorators import Appender, cache_readonly +import pandas.core.common as com +import pandas.indexes.base as ibase + + +class NumericIndex(Index): + """ + Provide numeric type operations + + This is an abstract class + + """ + _is_numeric_dtype = True + + def _maybe_cast_slice_bound(self, label, side, kind): + """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : string / None + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + + """ + + # we are a numeric index, so we accept + # integer/floats directly + if not (com.is_integer(label) or com.is_float(label)): + self._invalid_indexer('slice', label) + + return label + + def _convert_tolerance(self, tolerance): + try: + return float(tolerance) + except ValueError: + raise ValueError('tolerance argument for %s must be numeric: %r' % + (type(self).__name__, tolerance)) + + +class Int64Index(NumericIndex): + """ + Immutable ndarray implementing an ordered, sliceable set. The basic object + storing axis labels for all pandas objects. Int64Index is a special case + of `Index` with purely integer labels. This is the default index type used + by the DataFrame and Series ctors when no explicit index is provided by the + user. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: int64) + copy : bool + Make a copy of input ndarray + name : object + Name to be stored in the index + + Notes + ----- + An Index instance can **only** contain hashable objects + """ + + _typ = 'int64index' + _groupby = _algos.groupby_int64 + _arrmap = _algos.arrmap_int64 + _left_indexer_unique = _algos.left_join_indexer_unique_int64 + _left_indexer = _algos.left_join_indexer_int64 + _inner_indexer = _algos.inner_join_indexer_int64 + _outer_indexer = _algos.outer_join_indexer_int64 + + _can_hold_na = False + + _engine_type = _index.Int64Engine + + def __new__(cls, data=None, dtype=None, copy=False, name=None, + fastpath=False, **kwargs): + + if fastpath: + return cls._simple_new(data, name=name) + + # isscalar, generators handled in coerce_to_ndarray + data = cls._coerce_to_ndarray(data) + + if issubclass(data.dtype.type, compat.string_types): + cls._string_data_error(data) + + elif issubclass(data.dtype.type, np.integer): + # don't force the upcast as we may be dealing + # with a platform int + if (dtype is None or + not issubclass(np.dtype(dtype).type, np.integer)): + dtype = np.int64 + + subarr = np.array(data, dtype=dtype, copy=copy) + else: + subarr = np.array(data, dtype=np.int64, copy=copy) + if len(data) > 0: + if (subarr != data).any(): + raise TypeError('Unsafe NumPy casting to integer, you must' + ' explicitly cast') + + return cls._simple_new(subarr, name=name) + + @property + def inferred_type(self): + return 'integer' + + @property + def asi8(self): + # do not cache or you'll create a memory leak + return self.values.view('i8') + + @property + def is_all_dates(self): + """ + Checks that all the labels are datetime objects + """ + return False + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + try: + return com.array_equivalent(com._values_from_object(self), + com._values_from_object(other)) + except TypeError: + # e.g. fails in numpy 1.6 with DatetimeIndex #1681 + return False + + def _wrap_joined_index(self, joined, other): + name = self.name if self.name == other.name else None + return Int64Index(joined, name=name) + + +Int64Index._add_numeric_methods() +Int64Index._add_logical_methods() + + +class Float64Index(NumericIndex): + """ + Immutable ndarray implementing an ordered, sliceable set. The basic object + storing axis labels for all pandas objects. Float64Index is a special case + of `Index` with purely floating point labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: object) + copy : bool + Make a copy of input ndarray + name : object + Name to be stored in the index + + Notes + ----- + An Float64Index instance can **only** contain hashable objects + """ + + _typ = 'float64index' + _engine_type = _index.Float64Engine + _groupby = _algos.groupby_float64 + _arrmap = _algos.arrmap_float64 + _left_indexer_unique = _algos.left_join_indexer_unique_float64 + _left_indexer = _algos.left_join_indexer_float64 + _inner_indexer = _algos.inner_join_indexer_float64 + _outer_indexer = _algos.outer_join_indexer_float64 + + def __new__(cls, data=None, dtype=None, copy=False, name=None, + fastpath=False, **kwargs): + + if fastpath: + return cls._simple_new(data, name) + + data = cls._coerce_to_ndarray(data) + + if issubclass(data.dtype.type, compat.string_types): + cls._string_data_error(data) + + if dtype is None: + dtype = np.float64 + + try: + subarr = np.array(data, dtype=dtype, copy=copy) + except: + raise TypeError('Unsafe NumPy casting, you must explicitly cast') + + # coerce to float64 for storage + if subarr.dtype != np.float64: + subarr = subarr.astype(np.float64) + + return cls._simple_new(subarr, name) + + @property + def inferred_type(self): + return 'floating' + + def astype(self, dtype): + if np.dtype(dtype) not in (np.object, np.float64): + raise TypeError('Setting %s dtype to anything other than ' + 'float64 or object is not supported' % + self.__class__) + return Index(self._values, name=self.name, dtype=dtype) + + def _convert_scalar_indexer(self, key, kind=None): + """ + convert a scalar indexer + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + + right now we are converting + floats -> ints if the index supports it + """ + + if kind == 'iloc': + if com.is_integer(key): + return key + + return (super(Float64Index, self) + ._convert_scalar_indexer(key, kind=kind)) + + return key + + def _convert_slice_indexer(self, key, kind=None): + """ + convert a slice indexer, by definition these are labels + unless we are iloc + + Parameters + ---------- + key : label of the slice bound + kind : optional, type of the indexing operation (loc/ix/iloc/None) + """ + + # if we are not a slice, then we are done + if not isinstance(key, slice): + return key + + if kind == 'iloc': + return super(Float64Index, self)._convert_slice_indexer(key, + kind=kind) + + # translate to locations + return self.slice_indexer(key.start, key.stop, key.step) + + def _format_native_types(self, na_rep='', float_format=None, decimal='.', + quoting=None, **kwargs): + from pandas.core.format import FloatArrayFormatter + formatter = FloatArrayFormatter(self.values, na_rep=na_rep, + float_format=float_format, + decimal=decimal, quoting=quoting) + return formatter.get_formatted_data() + + def get_value(self, series, key): + """ we always want to get an index value, never a value """ + if not np.isscalar(key): + raise InvalidIndexError + + from pandas.core.indexing import maybe_droplevels + from pandas.core.series import Series + + k = com._values_from_object(key) + loc = self.get_loc(k) + new_values = com._values_from_object(series)[loc] + + if np.isscalar(new_values) or new_values is None: + return new_values + + new_index = self[loc] + new_index = maybe_droplevels(new_index, k) + return Series(new_values, index=new_index, name=series.name) + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self is other: + return True + + # need to compare nans locations and make sure that they are the same + # since nans don't compare equal this is a bit tricky + try: + if not isinstance(other, Float64Index): + other = self._constructor(other) + if (not com.is_dtype_equal(self.dtype, other.dtype) or + self.shape != other.shape): + return False + left, right = self._values, other._values + return ((left == right) | (self._isnan & other._isnan)).all() + except TypeError: + # e.g. fails in numpy 1.6 with DatetimeIndex #1681 + return False + + def __contains__(self, other): + if super(Float64Index, self).__contains__(other): + return True + + try: + # if other is a sequence this throws a ValueError + return np.isnan(other) and self.hasnans + except ValueError: + try: + return len(other) <= 1 and ibase._try_get_item(other) in self + except TypeError: + return False + except: + return False + + def get_loc(self, key, method=None, tolerance=None): + try: + if np.all(np.isnan(key)): + nan_idxs = self._nan_idxs + try: + return nan_idxs.item() + except (ValueError, IndexError): + # should only need to catch ValueError here but on numpy + # 1.7 .item() can raise IndexError when NaNs are present + return nan_idxs + except (TypeError, NotImplementedError): + pass + return super(Float64Index, self).get_loc(key, method=method, + tolerance=tolerance) + + @property + def is_all_dates(self): + """ + Checks that all the labels are datetime objects + """ + return False + + @cache_readonly + def is_unique(self): + return super(Float64Index, self).is_unique and self._nan_idxs.size < 2 + + @Appender(Index.isin.__doc__) + def isin(self, values, level=None): + value_set = set(values) + if level is not None: + self._validate_index_level(level) + return lib.ismember_nans(np.array(self), value_set, + com.isnull(list(value_set)).any()) + + +Float64Index._add_numeric_methods() +Float64Index._add_logical_methods_disabled() diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py new file mode 100644 index 0000000000000..1b004a0034e7a --- /dev/null +++ b/pandas/indexes/range.py @@ -0,0 +1,623 @@ +from sys import getsizeof +import operator + +import numpy as np +import pandas.index as _index + +from pandas import compat +from pandas.compat import lrange +from pandas.indexes.base import Index +from pandas.util.decorators import Appender, cache_readonly +import pandas.core.common as com +import pandas.indexes.base as ibase + +from pandas.indexes.numeric import Int64Index + + +class RangeIndex(Int64Index): + + """ + Immutable Index implementing a monotonic range. RangeIndex is a + memory-saving special case of Int64Index limited to representing + monotonic ranges. + + Parameters + ---------- + start : int (default: 0) + stop : int (default: 0) + step : int (default: 1) + name : object, optional + Name to be stored in the index + copy : bool, default False + Make a copy of input if its a RangeIndex + + """ + + _typ = 'rangeindex' + _engine_type = _index.Int64Engine + + def __new__(cls, start=None, stop=None, step=None, name=None, dtype=None, + fastpath=False, copy=False, **kwargs): + + if fastpath: + return cls._simple_new(start, stop, step, name=name) + + cls._validate_dtype(dtype) + + # RangeIndex + if isinstance(start, RangeIndex): + if not copy: + return start + if name is None: + name = getattr(start, 'name', None) + start, stop, step = start._start, start._stop, start._step + + # validate the arguments + def _ensure_int(value, field): + try: + new_value = int(value) + except: + new_value = value + + if not com.is_integer(new_value) or new_value != value: + raise TypeError("RangeIndex(...) must be called with integers," + " {value} was passed for {field}".format( + value=type(value).__name__, + field=field) + ) + + return new_value + + if start is None: + start = 0 + else: + start = _ensure_int(start, 'start') + if stop is None: + stop = start + start = 0 + else: + stop = _ensure_int(stop, 'stop') + if step is None: + step = 1 + elif step == 0: + raise ValueError("Step must not be zero") + else: + step = _ensure_int(step, 'step') + + return cls._simple_new(start, stop, step, name) + + @classmethod + def from_range(cls, data, name=None, dtype=None, **kwargs): + """ create RangeIndex from a range (py3), or xrange (py2) object """ + if not isinstance(data, range): + raise TypeError( + '{0}(...) must be called with object coercible to a ' + 'range, {1} was passed'.format(cls.__name__, repr(data))) + + if compat.PY3: + step = data.step + stop = data.stop + start = data.start + else: + # seems we only have indexing ops to infer + # rather than direct accessors + if len(data) > 1: + step = data[1] - data[0] + stop = data[-1] + step + start = data[0] + elif len(data): + start = data[0] + stop = data[0] + 1 + step = 1 + else: + start = stop = 0 + step = 1 + return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs) + + @classmethod + def _simple_new(cls, start, stop=None, step=None, name=None, + dtype=None, **kwargs): + result = object.__new__(cls) + + # handle passed None, non-integers + if start is None or not com.is_integer(start): + try: + return RangeIndex(start, stop, step, name=name, **kwargs) + except TypeError: + return Index(start, stop, step, name=name, **kwargs) + + result._start = start + result._stop = stop or 0 + result._step = step or 1 + result.name = name + for k, v in compat.iteritems(kwargs): + setattr(result, k, v) + + result._reset_identity() + return result + + @staticmethod + def _validate_dtype(dtype): + """ require dtype to be None or int64 """ + if not (dtype is None or com.is_int64_dtype(dtype)): + raise TypeError('Invalid to pass a non-int64 dtype to RangeIndex') + + @cache_readonly + def _constructor(self): + """ return the class to use for construction """ + return Int64Index + + @cache_readonly + def _data(self): + return np.arange(self._start, self._stop, self._step, dtype=np.int64) + + @cache_readonly + def _int64index(self): + return Int64Index(self._data, name=self.name, fastpath=True) + + def _get_data_as_items(self): + """ return a list of tuples of start, stop, step """ + return [('start', self._start), + ('stop', self._stop), + ('step', self._step)] + + def __reduce__(self): + d = self._get_attributes_dict() + d.update(dict(self._get_data_as_items())) + return ibase._new_Index, (self.__class__, d), None + + def _format_attrs(self): + """ + Return a list of tuples of the (attr, formatted_value) + """ + attrs = self._get_data_as_items() + if self.name is not None: + attrs.append(('name', ibase.default_pprint(self.name))) + return attrs + + def _format_data(self): + # we are formatting thru the attributes + return None + + @cache_readonly + def nbytes(self): + """ return the number of bytes in the underlying data """ + return sum([getsizeof(getattr(self, v)) for v in + ['_start', '_stop', '_step']]) + + def memory_usage(self, deep=False): + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self.nbytes + + @property + def dtype(self): + return np.dtype(np.int64) + + @property + def is_unique(self): + """ return if the index has unique values """ + return True + + @property + def has_duplicates(self): + return False + + def tolist(self): + return lrange(self._start, self._stop, self._step) + + def _shallow_copy(self, values=None, **kwargs): + """ create a new Index, don't copy the data, use the same object attributes + with passed in attributes taking precedence """ + if values is None: + return RangeIndex(name=self.name, fastpath=True, + **dict(self._get_data_as_items())) + else: + kwargs.setdefault('name', self.name) + return self._int64index._shallow_copy(values, **kwargs) + + @Appender(ibase._index_shared_docs['copy']) + def copy(self, name=None, deep=False, dtype=None, **kwargs): + self._validate_dtype(dtype) + if name is None: + name = self.name + return RangeIndex(name=name, fastpath=True, + **dict(self._get_data_as_items())) + + def argsort(self, *args, **kwargs): + """ + return an ndarray indexer of the underlying data + + See also + -------- + numpy.ndarray.argsort + """ + if self._step > 0: + return np.arange(len(self)) + else: + return np.arange(len(self) - 1, -1, -1) + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if isinstance(other, RangeIndex): + ls = len(self) + lo = len(other) + return (ls == lo == 0 or + ls == lo == 1 and + self._start == other._start or + ls == lo and + self._start == other._start and + self._step == other._step) + + return super(RangeIndex, self).equals(other) + + def intersection(self, other): + """ + Form the intersection of two Index objects. Sortedness of the result is + not guaranteed + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + intersection : Index + """ + if not isinstance(other, RangeIndex): + return super(RangeIndex, self).intersection(other) + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(min(self._start, self._stop + 1), + min(other._start, other._stop + 1)) + int_high = min(max(self._stop, self._start + 1), + max(other._stop, other._start + 1)) + if int_high <= int_low: + return RangeIndex() + + # Method hint: linear Diophantine equation + # solve intersection problem + # performance hint: for identical step sizes, could use + # cheaper alternative + gcd, s, t = self._extended_gcd(self._step, other._step) + + # check whether element sets intersect + if (self._start - other._start) % gcd: + return RangeIndex() + + # calculate parameters for the RangeIndex describing the + # intersection disregarding the lower bounds + tmp_start = self._start + (other._start - self._start) * \ + self._step // gcd * s + new_step = self._step * other._step // gcd + new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) + + # adjust index to limiting interval + new_index._start = new_index._min_fitting_element(int_low) + return new_index + + def _min_fitting_element(self, lower_limit): + """Returns the smallest element greater than or equal to the limit""" + no_steps = -(-(lower_limit - self._start) // abs(self._step)) + return self._start + abs(self._step) * no_steps + + def _max_fitting_element(self, upper_limit): + """Returns the largest element smaller than or equal to the limit""" + no_steps = (upper_limit - self._start) // abs(self._step) + return self._start + abs(self._step) * no_steps + + def _extended_gcd(self, a, b): + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r // r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def union(self, other): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + union : Index + """ + self._assert_can_do_setop(other) + if len(other) == 0 or self.equals(other): + return self + if len(self) == 0: + return other + if isinstance(other, RangeIndex): + start_s, step_s = self._start, self._step + end_s = self._start + self._step * (len(self) - 1) + start_o, step_o = other._start, other._step + end_o = other._start + other._step * (len(other) - 1) + if self._step < 0: + start_s, step_s, end_s = end_s, -step_s, start_s + if other._step < 0: + start_o, step_o, end_o = end_o, -step_o, start_o + if len(self) == 1 and len(other) == 1: + step_s = step_o = abs(self._start - other._start) + elif len(self) == 1: + step_s = step_o + elif len(other) == 1: + step_o = step_s + start_r = min(start_s, start_o) + end_r = max(end_s, end_o) + if step_o == step_s: + if ((start_s - start_o) % step_s == 0 and + (start_s - end_o) <= step_s and + (start_o - end_s) <= step_s): + return RangeIndex(start_r, end_r + step_s, step_s) + if ((step_s % 2 == 0) and + (abs(start_s - start_o) <= step_s / 2) and + (abs(end_s - end_o) <= step_s / 2)): + return RangeIndex(start_r, end_r + step_s / 2, step_s / 2) + elif step_o % step_s == 0: + if ((start_o - start_s) % step_s == 0 and + (start_o + step_s >= start_s) and + (end_o - step_s <= end_s)): + return RangeIndex(start_r, end_r + step_s, step_s) + elif step_s % step_o == 0: + if ((start_s - start_o) % step_o == 0 and + (start_s + step_o >= start_o) and + (end_s - step_o <= end_o)): + return RangeIndex(start_r, end_r + step_o, step_o) + + return self._int64index.union(other) + + def join(self, other, how='left', level=None, return_indexers=False): + """ + *this is an internal non-public method* + + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : boolean, default False + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + if how == 'outer' and self is not other: + # note: could return RangeIndex in more circumstances + return self._int64index.join(other, how, level, return_indexers) + + return super(RangeIndex, self).join(other, how, level, return_indexers) + + def __len__(self): + """ + return the length of the RangeIndex + """ + return max(0, -(-(self._stop - self._start) // self._step)) + + @property + def size(self): + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + super_getitem = super(RangeIndex, self).__getitem__ + + if np.isscalar(key): + n = int(key) + if n != key: + return super_getitem(key) + if n < 0: + n = len(self) + key + if n < 0 or n > len(self) - 1: + raise IndexError("index {key} is out of bounds for axis 0 " + "with size {size}".format(key=key, + size=len(self))) + return self._start + n * self._step + + if isinstance(key, slice): + + # This is basically PySlice_GetIndicesEx, but delegation to our + # super routines if we don't have integers + + l = len(self) + + # complete missing slice information + step = 1 if key.step is None else key.step + if key.start is None: + start = l - 1 if step < 0 else 0 + else: + start = key.start + + if start < 0: + start += l + if start < 0: + start = -1 if step < 0 else 0 + if start >= l: + start = l - 1 if step < 0 else l + + if key.stop is None: + stop = -1 if step < 0 else l + else: + stop = key.stop + + if stop < 0: + stop += l + if stop < 0: + stop = -1 + if stop > l: + stop = l + + # delegate non-integer slices + if (start != int(start) and + stop != int(stop) and + step != int(step)): + return super_getitem(key) + + # convert indexes to values + start = self._start + self._step * start + stop = self._start + self._step * stop + step = self._step * step + + return RangeIndex(start, stop, step, self.name, fastpath=True) + + # fall back to Int64Index + return super_getitem(key) + + def __floordiv__(self, other): + if com.is_integer(other): + if (len(self) == 0 or + self._start % other == 0 and + self._step % other == 0): + start = self._start // other + step = self._step // other + stop = start + len(self) * step + return RangeIndex(start, stop, step, name=self.name, + fastpath=True) + if len(self) == 1: + start = self._start // other + return RangeIndex(start, start + 1, 1, name=self.name, + fastpath=True) + return self._int64index // other + + @classmethod + def _add_numeric_methods_binary(cls): + """ add in numeric methods, specialized to RangeIndex """ + + def _make_evaluate_binop(op, opstr, reversed=False, step=False): + """ + Parameters + ---------- + op : callable that accepts 2 parms + perform the binary op + opstr : string + string name of ops + reversed : boolean, default False + if this is a reversed op, e.g. radd + step : callable, optional, default to False + op to apply to the step parm if not None + if False, use the existing step + """ + + def _evaluate_numeric_binop(self, other): + + other = self._validate_for_numeric_binop(other, op, opstr) + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + + if reversed: + self, other = other, self + + try: + # alppy if we have an override + if step: + rstep = step(self._step, other) + + # we don't have a representable op + # so return a base index + if not com.is_integer(rstep) or not rstep: + raise ValueError + + else: + rstep = self._step + + rstart = op(self._start, other) + rstop = op(self._stop, other) + + result = RangeIndex(rstart, + rstop, + rstep, + **attrs) + + # for compat with numpy / Int64Index + # even if we can represent as a RangeIndex, return + # as a Float64Index if we have float-like descriptors + if not all([com.is_integer(x) for x in + [rstart, rstop, rstep]]): + result = result.astype('float64') + + return result + + except (ValueError, TypeError, AttributeError): + pass + + # convert to Int64Index ops + if isinstance(self, RangeIndex): + self = self.values + if isinstance(other, RangeIndex): + other = other.values + + return Index(op(self, other), **attrs) + + return _evaluate_numeric_binop + + cls.__add__ = cls.__radd__ = _make_evaluate_binop( + operator.add, '__add__') + cls.__sub__ = _make_evaluate_binop(operator.sub, '__sub__') + cls.__rsub__ = _make_evaluate_binop( + operator.sub, '__sub__', reversed=True) + cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( + operator.mul, + '__mul__', + step=operator.mul) + cls.__truediv__ = _make_evaluate_binop( + operator.truediv, + '__truediv__', + step=operator.truediv) + cls.__rtruediv__ = _make_evaluate_binop( + operator.truediv, + '__truediv__', + reversed=True, + step=operator.truediv) + if not compat.PY3: + cls.__div__ = _make_evaluate_binop( + operator.div, + '__div__', + step=operator.div) + cls.__rdiv__ = _make_evaluate_binop( + operator.div, + '__div__', + reversed=True, + step=operator.div) + +RangeIndex._add_numeric_methods() +RangeIndex._add_logical_methods() From f73c17bb6ccf0bf3493e403addeef1b8d0006214 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 23 Jan 2016 21:40:51 -0800 Subject: [PATCH 2/8] Remove float.py --- pandas/indexes/float.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 pandas/indexes/float.py diff --git a/pandas/indexes/float.py b/pandas/indexes/float.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 From f464384dc898e71a721c4c5496c74f9907007b7f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 23 Jan 2016 22:51:32 -0800 Subject: [PATCH 3/8] Reorganize pandas/tests/test_index.py --- pandas/tests/indexes/__init__.py | 0 pandas/tests/indexes/common.py | 678 ++ .../{ => indexes}/data/mindex_073.pickle | Bin .../tests/{ => indexes}/data/s1-0.12.0.pickle | Bin .../tests/{ => indexes}/data/s2-0.12.0.pickle | Bin pandas/tests/indexes/test_base.py | 1490 ++++ pandas/tests/indexes/test_category.py | 662 ++ pandas/tests/indexes/test_datetimelike.py | 855 ++ pandas/tests/indexes/test_multi.py | 1949 +++++ pandas/tests/indexes/test_numeric.py | 824 ++ pandas/tests/indexes/test_range.py | 806 ++ pandas/tests/test_index.py | 7146 ----------------- setup.py | 3 + 13 files changed, 7267 insertions(+), 7146 deletions(-) create mode 100644 pandas/tests/indexes/__init__.py create mode 100644 pandas/tests/indexes/common.py rename pandas/tests/{ => indexes}/data/mindex_073.pickle (100%) rename pandas/tests/{ => indexes}/data/s1-0.12.0.pickle (100%) rename pandas/tests/{ => indexes}/data/s2-0.12.0.pickle (100%) create mode 100644 pandas/tests/indexes/test_base.py create mode 100644 pandas/tests/indexes/test_category.py create mode 100644 pandas/tests/indexes/test_datetimelike.py create mode 100644 pandas/tests/indexes/test_multi.py create mode 100644 pandas/tests/indexes/test_numeric.py create mode 100644 pandas/tests/indexes/test_range.py delete mode 100644 pandas/tests/test_index.py diff --git a/pandas/tests/indexes/__init__.py b/pandas/tests/indexes/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py new file mode 100644 index 0000000000000..fbe4fb39e062e --- /dev/null +++ b/pandas/tests/indexes/common.py @@ -0,0 +1,678 @@ +# -*- coding: utf-8 -*- + +from pandas import compat +from pandas.compat import PY3 + +import numpy as np + +from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex, + MultiIndex, CategoricalIndex, DatetimeIndex, + TimedeltaIndex, PeriodIndex) +from pandas.util.testing import assertRaisesRegexp + +import pandas.util.testing as tm + +import pandas as pd + +if PY3: + unicode = lambda x: x + + +class Base(object): + """ base class for index sub-class tests """ + _holder = None + _compat_props = ['shape', 'ndim', 'size', 'itemsize', 'nbytes'] + + # @staticmethod + # def setup_indices(cls, indices): + # # setup the test indices in the self.indices dict + # def make_accessor(x): + # @property + # def accessor(self): + # key = '__cached_{0}'.format(x) + # if hasattr(self, key): + # return getattr(self, key) + # else: + # result = self.indices[x].copy(deep=True) + # setattr(self, key, result) + # return result + + # return accessor + + # for name in indices: + # setattr(cls, name, make_accessor(name)) + + def setup_indices(self): + for name, idx in self.indices.items(): + setattr(self, name, idx) + + def verify_pickle(self, index): + unpickled = self.round_trip_pickle(index) + self.assertTrue(index.equals(unpickled)) + + def test_pickle_compat_construction(self): + # this is testing for pickle compat + if self._holder is None: + return + + # need an object to create with + self.assertRaises(TypeError, self._holder) + + def test_shift(self): + + # GH8083 test the base class for shift + idx = self.create_index() + self.assertRaises(NotImplementedError, idx.shift, 1) + self.assertRaises(NotImplementedError, idx.shift, 1, 2) + + def test_create_index_existing_name(self): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + expected = self.create_index() + if not isinstance(expected, MultiIndex): + expected.name = 'foo' + result = pd.Index(expected) + tm.assert_index_equal(result, expected) + + result = pd.Index(expected, name='bar') + expected.name = 'bar' + tm.assert_index_equal(result, expected) + else: + expected.names = ['foo', 'bar'] + result = pd.Index(expected) + tm.assert_index_equal( + result, Index(Index([('foo', 'one'), ('foo', 'two'), + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], + dtype='object'), + names=['foo', 'bar'])) + + result = pd.Index(expected, names=['A', 'B']) + tm.assert_index_equal( + result, + Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], + dtype='object'), names=['A', 'B'])) + + def test_numeric_compat(self): + + idx = self.create_index() + tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", + lambda: idx * 1) + tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", + lambda: 1 * idx) + + div_err = "cannot perform __truediv__" if PY3 \ + else "cannot perform __div__" + tm.assertRaisesRegexp(TypeError, div_err, lambda: idx / 1) + tm.assertRaisesRegexp(TypeError, div_err, lambda: 1 / idx) + tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", + lambda: idx // 1) + tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", + lambda: 1 // idx) + + def test_logical_compat(self): + idx = self.create_index() + tm.assertRaisesRegexp(TypeError, 'cannot perform all', + lambda: idx.all()) + tm.assertRaisesRegexp(TypeError, 'cannot perform any', + lambda: idx.any()) + + def test_boolean_context_compat(self): + + # boolean context compat + idx = self.create_index() + + def f(): + if idx: + pass + + tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) + + def test_reindex_base(self): + idx = self.create_index() + expected = np.arange(idx.size) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): + idx.get_indexer(idx, method='invalid') + + def test_ndarray_compat_properties(self): + + idx = self.create_index() + self.assertTrue(idx.T.equals(idx)) + self.assertTrue(idx.transpose().equals(idx)) + + values = idx.values + for prop in self._compat_props: + self.assertEqual(getattr(idx, prop), getattr(values, prop)) + + # test for validity + idx.nbytes + idx.values.nbytes + + def test_repr_roundtrip(self): + + idx = self.create_index() + tm.assert_index_equal(eval(repr(idx)), idx) + + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = 'foo' + self.assertTrue("'foo'" in str(idx)) + self.assertTrue(idx.__class__.__name__ in str(idx)) + + def test_dtype_str(self): + for idx in self.indices.values(): + dtype = idx.dtype_str + self.assertIsInstance(dtype, compat.string_types) + if isinstance(idx, PeriodIndex): + self.assertEqual(dtype, 'period') + else: + self.assertEqual(dtype, str(idx.dtype)) + + def test_repr_max_seq_item_setting(self): + # GH10182 + idx = self.create_index() + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + self.assertFalse('...' in str(idx)) + + def test_wrong_number_names(self): + def testit(ind): + ind.names = ["apple", "banana", "carrot"] + + for ind in self.indices.values(): + assertRaisesRegexp(ValueError, "^Length", testit, ind) + + def test_set_name_methods(self): + new_name = "This is the new name for this index" + for ind in self.indices.values(): + + # don't tests a MultiIndex here (as its tested separated) + if isinstance(ind, MultiIndex): + continue + + original_name = ind.name + new_ind = ind.set_names([new_name]) + self.assertEqual(new_ind.name, new_name) + self.assertEqual(ind.name, original_name) + res = ind.rename(new_name, inplace=True) + + # should return None + self.assertIsNone(res) + self.assertEqual(ind.name, new_name) + self.assertEqual(ind.names, [new_name]) + # with assertRaisesRegexp(TypeError, "list-like"): + # # should still fail even if it would be the right length + # ind.set_names("a") + with assertRaisesRegexp(ValueError, "Level must be None"): + ind.set_names("a", level=0) + + # rename in place just leaves tuples and other containers alone + name = ('A', 'B') + ind.rename(name, inplace=True) + self.assertEqual(ind.name, name) + self.assertEqual(ind.names, [name]) + + def test_hash_error(self): + for ind in self.indices.values(): + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % + type(ind).__name__): + hash(ind) + + def test_copy_and_deepcopy(self): + from copy import copy, deepcopy + + for ind in self.indices.values(): + + # don't tests a MultiIndex here (as its tested separated) + if isinstance(ind, MultiIndex): + continue + + for func in (copy, deepcopy): + idx_copy = func(ind) + self.assertIsNot(idx_copy, ind) + self.assertTrue(idx_copy.equals(ind)) + + new_copy = ind.copy(deep=True, name="banana") + self.assertEqual(new_copy.name, "banana") + + def test_duplicates(self): + for ind in self.indices.values(): + + if not len(ind): + continue + if isinstance(ind, MultiIndex): + continue + idx = self._holder([ind[0]] * 5) + self.assertFalse(idx.is_unique) + self.assertTrue(idx.has_duplicates) + + # GH 10115 + # preserve names + idx.name = 'foo' + result = idx.drop_duplicates() + self.assertEqual(result.name, 'foo') + self.assert_index_equal(result, Index([ind[0]], name='foo')) + + def test_sort(self): + for ind in self.indices.values(): + self.assertRaises(TypeError, ind.sort) + + def test_order(self): + for ind in self.indices.values(): + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + ind.order() + + def test_mutability(self): + for ind in self.indices.values(): + if not len(ind): + continue + self.assertRaises(TypeError, ind.__setitem__, 0, ind[0]) + + def test_view(self): + for ind in self.indices.values(): + i_view = ind.view() + self.assertEqual(i_view.name, ind.name) + + def test_compat(self): + for ind in self.indices.values(): + self.assertEqual(ind.tolist(), list(ind)) + + def test_argsort(self): + for k, ind in self.indices.items(): + + # sep teststed + if k in ['catIndex']: + continue + + result = ind.argsort() + expected = np.array(ind).argsort() + tm.assert_numpy_array_equal(result, expected) + + def test_pickle(self): + for ind in self.indices.values(): + self.verify_pickle(ind) + ind.name = 'foo' + self.verify_pickle(ind) + + def test_take(self): + indexer = [4, 3, 0, 2] + for k, ind in self.indices.items(): + + # separate + if k in ['boolIndex', 'tuples', 'empty']: + continue + + result = ind.take(indexer) + expected = ind[indexer] + self.assertTrue(result.equals(expected)) + + if not isinstance(ind, + (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + with tm.assertRaises(AttributeError): + ind.freq + + def test_setops_errorcases(self): + for name, idx in compat.iteritems(self.indices): + # # non-iterable input + cases = [0.5, 'xxx'] + methods = [idx.intersection, idx.union, idx.difference, + idx.sym_diff] + + for method in methods: + for case in cases: + assertRaisesRegexp(TypeError, + "Input must be Index or array-like", + method, case) + + def test_intersection_base(self): + for name, idx in compat.iteritems(self.indices): + first = idx[:5] + second = idx[:3] + intersect = first.intersection(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + self.assertTrue(tm.equalContents(intersect, second)) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assertRaisesRegexp(ValueError, msg): + result = first.intersection(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.intersection(case) + self.assertTrue(tm.equalContents(result, second)) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assertRaisesRegexp(TypeError, msg): + result = first.intersection([1, 2, 3]) + + def test_union_base(self): + for name, idx in compat.iteritems(self.indices): + first = idx[3:] + second = idx[:5] + everything = idx + union = first.union(second) + self.assertTrue(tm.equalContents(union, everything)) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assertRaisesRegexp(ValueError, msg): + result = first.union(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.union(case) + self.assertTrue(tm.equalContents(result, everything)) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assertRaisesRegexp(TypeError, msg): + result = first.union([1, 2, 3]) + + def test_difference_base(self): + for name, idx in compat.iteritems(self.indices): + first = idx[2:] + second = idx[:4] + answer = idx[4:] + result = first.difference(second) + + if isinstance(idx, CategoricalIndex): + pass + else: + self.assertTrue(tm.equalContents(result, answer)) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assertRaisesRegexp(ValueError, msg): + result = first.difference(case) + elif isinstance(idx, CategoricalIndex): + pass + elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + self.assertEqual(result.__class__, answer.__class__) + tm.assert_numpy_array_equal(result.asi8, answer.asi8) + else: + result = first.difference(case) + self.assertTrue(tm.equalContents(result, answer)) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assertRaisesRegexp(TypeError, msg): + result = first.difference([1, 2, 3]) + + def test_symmetric_diff(self): + for name, idx in compat.iteritems(self.indices): + first = idx[1:] + second = idx[:-1] + if isinstance(idx, CategoricalIndex): + pass + else: + answer = idx[[0, -1]] + result = first.sym_diff(second) + self.assertTrue(tm.equalContents(result, answer)) + + # GH 10149 + cases = [klass(second.values) + for klass in [np.array, Series, list]] + for case in cases: + if isinstance(idx, PeriodIndex): + msg = "can only call with other PeriodIndex-ed objects" + with tm.assertRaisesRegexp(ValueError, msg): + result = first.sym_diff(case) + elif isinstance(idx, CategoricalIndex): + pass + else: + result = first.sym_diff(case) + self.assertTrue(tm.equalContents(result, answer)) + + if isinstance(idx, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with tm.assertRaisesRegexp(TypeError, msg): + result = first.sym_diff([1, 2, 3]) + + def test_insert_base(self): + + for name, idx in compat.iteritems(self.indices): + result = idx[1:4] + + if not len(idx): + continue + + # test 0th element + self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) + + def test_delete_base(self): + + for name, idx in compat.iteritems(self.indices): + + if not len(idx): + continue + + if isinstance(idx, RangeIndex): + # tested in class + continue + + expected = idx[1:] + result = idx.delete(0) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + expected = idx[:-1] + result = idx.delete(-1) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + with tm.assertRaises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) + + def test_equals_op(self): + # GH9947, GH10637 + index_a = self.create_index() + if isinstance(index_a, PeriodIndex): + return + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + index_a == series_b + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + index_a == index_d + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + index_a == series_d + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + index_a == array_d + with tm.assertRaisesRegexp(ValueError, "Series lengths must match"): + series_a == series_d + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_numpy_array_equal(series_a == item, expected3) + + def test_numpy_ufuncs(self): + # test ufuncs of numpy 1.9.2. see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + # some functions are skipped because it may return different result + # for unicode input depending on numpy version + + for name, idx in compat.iteritems(self.indices): + for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, + np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, + np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, + np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, + np.rad2deg]: + if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + # PeriodIndex behavior should be changed in future version + with tm.assertRaises(Exception): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index)): + # coerces to float (e.g. np.sin) + result = func(idx) + exp = Index(func(idx.values), name=idx.name) + self.assert_index_equal(result, exp) + self.assertIsInstance(result, pd.Float64Index) + else: + # raise AttributeError or TypeError + if len(idx) == 0: + continue + else: + with tm.assertRaises(Exception): + func(idx) + + for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: + if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + with tm.assertRaises(Exception): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index)): + # results in bool array + result = func(idx) + exp = func(idx.values) + self.assertIsInstance(result, np.ndarray) + tm.assertNotIsInstance(result, Index) + else: + if len(idx) == 0: + continue + else: + with tm.assertRaises(Exception): + func(idx) + + def test_hasnans_isnans(self): + # GH 11343, added tests for hasnans / isnans + for name, index in self.indices.items(): + if isinstance(index, MultiIndex): + pass + else: + idx = index.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(idx), dtype=bool) + self.assert_numpy_array_equal(idx._isnan, expected) + self.assertFalse(idx.hasnans) + + idx = index.copy() + values = idx.values + + if len(index) == 0: + continue + elif isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): + values[1] = pd.tslib.iNaT + elif isinstance(index, Int64Index): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + self.assert_numpy_array_equal(idx._isnan, expected) + self.assertTrue(idx.hasnans) + + def test_fillna(self): + # GH 11343 + for name, index in self.indices.items(): + if len(index) == 0: + pass + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isnull is not defined for MultiIndex" + with self.assertRaisesRegexp(NotImplementedError, msg): + idx.fillna(idx[0]) + else: + idx = index.copy() + result = idx.fillna(idx[0]) + self.assert_index_equal(result, idx) + self.assertFalse(result is idx) + + msg = "'value' must be a scalar, passed: " + with self.assertRaisesRegexp(TypeError, msg): + idx.fillna([idx[0]]) + + idx = index.copy() + values = idx.values + + if isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): + values[1] = pd.tslib.iNaT + elif isinstance(index, Int64Index): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = index.__class__(values, freq=index.freq) + else: + idx = index.__class__(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + self.assert_numpy_array_equal(idx._isnan, expected) + self.assertTrue(idx.hasnans) diff --git a/pandas/tests/data/mindex_073.pickle b/pandas/tests/indexes/data/mindex_073.pickle similarity index 100% rename from pandas/tests/data/mindex_073.pickle rename to pandas/tests/indexes/data/mindex_073.pickle diff --git a/pandas/tests/data/s1-0.12.0.pickle b/pandas/tests/indexes/data/s1-0.12.0.pickle similarity index 100% rename from pandas/tests/data/s1-0.12.0.pickle rename to pandas/tests/indexes/data/s1-0.12.0.pickle diff --git a/pandas/tests/data/s2-0.12.0.pickle b/pandas/tests/indexes/data/s2-0.12.0.pickle similarity index 100% rename from pandas/tests/data/s2-0.12.0.pickle rename to pandas/tests/indexes/data/s2-0.12.0.pickle diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py new file mode 100644 index 0000000000000..9d97b98b5a13e --- /dev/null +++ b/pandas/tests/indexes/test_base.py @@ -0,0 +1,1490 @@ +from datetime import datetime, timedelta + +# TODO(wesm): fix long line flake8 issues +# flake8: noqa + +import pandas.util.testing as tm +from pandas.indexes.api import Index, MultiIndex +from .common import Base + +from pandas.compat import (is_platform_windows, range, lrange, lzip, u, + zip, PY3) +import operator +import os + +import numpy as np + +from pandas import (period_range, date_range, Series, + Float64Index, Int64Index, + CategoricalIndex, DatetimeIndex, TimedeltaIndex, + PeriodIndex) +from pandas.util.testing import assert_almost_equal + +import pandas.core.config as cf + +from pandas.tseries.index import _to_m8 + +import pandas as pd +from pandas.lib import Timestamp + +if PY3: + unicode = lambda x: x + + +class TestIndex(Base, tm.TestCase): + _holder = Index + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), + strIndex=tm.makeStringIndex(100), + dateIndex=tm.makeDateIndex(100), + periodIndex=tm.makePeriodIndex(100), + tdIndex=tm.makeTimedeltaIndex(100), + intIndex=tm.makeIntIndex(100), + rangeIndex=tm.makeIntIndex(100), + floatIndex=tm.makeFloatIndex(100), + boolIndex=Index([True, False]), + catIndex=tm.makeCategoricalIndex(100), + empty=Index([]), + tuples=MultiIndex.from_tuples(lzip( + ['foo', 'bar', 'baz'], [1, 2, 3]))) + self.setup_indices() + + def create_index(self): + return Index(list('abcde')) + + def test_new_axis(self): + new_index = self.dateIndex[None, :] + self.assertEqual(new_index.ndim, 2) + tm.assertIsInstance(new_index, np.ndarray) + + def test_copy_and_deepcopy(self): + super(TestIndex, self).test_copy_and_deepcopy() + + new_copy2 = self.intIndex.copy(dtype=int) + self.assertEqual(new_copy2.dtype.kind, 'i') + + def test_constructor(self): + # regular instance creation + tm.assert_contains_all(self.strIndex, self.strIndex) + tm.assert_contains_all(self.dateIndex, self.dateIndex) + + # casting + arr = np.array(self.strIndex) + index = Index(arr) + tm.assert_contains_all(arr, index) + tm.assert_numpy_array_equal(self.strIndex, index) + + # copy + arr = np.array(self.strIndex) + index = Index(arr, copy=True, name='name') + tm.assertIsInstance(index, Index) + self.assertEqual(index.name, 'name') + tm.assert_numpy_array_equal(arr, index) + arr[0] = "SOMEBIGLONGSTRING" + self.assertNotEqual(index[0], "SOMEBIGLONGSTRING") + + # what to do here? + # arr = np.array(5.) + # self.assertRaises(Exception, arr.view, Index) + + def test_constructor_corner(self): + # corner case + self.assertRaises(TypeError, Index, 0) + + def test_construction_list_mixed_tuples(self): + # 10697 + # if we are constructing from a mixed list of tuples, make sure that we + # are independent of the sorting order + idx1 = Index([('A', 1), 'B']) + self.assertIsInstance(idx1, Index) and self.assertNotInstance( + idx1, MultiIndex) + idx2 = Index(['B', ('A', 1)]) + self.assertIsInstance(idx2, Index) and self.assertNotInstance( + idx2, MultiIndex) + + def test_constructor_from_series(self): + + expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'), + Timestamp('20130101')]) + s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp( + '20130101')]) + result = Index(s) + self.assertTrue(result.equals(expected)) + result = DatetimeIndex(s) + self.assertTrue(result.equals(expected)) + + # GH 6273 + # create from a series, passing a freq + s = Series(pd.to_datetime(['1-1-1990', '2-1-1990', '3-1-1990', + '4-1-1990', '5-1-1990'])) + result = DatetimeIndex(s, freq='MS') + expected = DatetimeIndex( + ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990' + ], freq='MS') + self.assertTrue(result.equals(expected)) + + df = pd.DataFrame(np.random.rand(5, 3)) + df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', + '5-1-1990'] + result = DatetimeIndex(df['date'], freq='MS') + self.assertTrue(result.equals(expected)) + self.assertEqual(df['date'].dtype, object) + + exp = pd.Series( + ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990' + ], name='date') + self.assert_series_equal(df['date'], exp) + + # GH 6274 + # infer freq of same + result = pd.infer_freq(df['date']) + self.assertEqual(result, 'MS') + + def test_constructor_ndarray_like(self): + # GH 5460#issuecomment-44474502 + # it should be possible to convert any object that satisfies the numpy + # ndarray interface directly into an Index + class ArrayLike(object): + + def __init__(self, array): + self.array = array + + def __array__(self, dtype=None): + return self.array + + for array in [np.arange(5), np.array(['a', 'b', 'c']), + date_range('2000-01-01', periods=3).values]: + expected = pd.Index(array) + result = pd.Index(ArrayLike(array)) + self.assertTrue(result.equals(expected)) + + def test_index_ctor_infer_periodindex(self): + xp = period_range('2012-1-1', freq='M', periods=3) + rs = Index(xp) + tm.assert_numpy_array_equal(rs, xp) + tm.assertIsInstance(rs, PeriodIndex) + + def test_constructor_simple_new(self): + idx = Index([1, 2, 3, 4, 5], name='int') + result = idx._simple_new(idx, 'int') + self.assertTrue(result.equals(idx)) + + idx = Index([1.1, np.nan, 2.2, 3.0], name='float') + result = idx._simple_new(idx, 'float') + self.assertTrue(result.equals(idx)) + + idx = Index(['A', 'B', 'C', np.nan], name='obj') + result = idx._simple_new(idx, 'obj') + self.assertTrue(result.equals(idx)) + + def test_constructor_dtypes(self): + + for idx in [Index(np.array([1, 2, 3], dtype=int)), Index( + np.array( + [1, 2, 3], dtype=int), dtype=int), Index( + np.array( + [1., 2., 3.], dtype=float), dtype=int), Index( + [1, 2, 3], dtype=int), Index( + [1., 2., 3.], dtype=int)]: + self.assertIsInstance(idx, Int64Index) + + for idx in [Index(np.array([1., 2., 3.], dtype=float)), Index( + np.array( + [1, 2, 3], dtype=int), dtype=float), Index( + np.array( + [1., 2., 3.], dtype=float), dtype=float), Index( + [1, 2, 3], dtype=float), Index( + [1., 2., 3.], dtype=float)]: + self.assertIsInstance(idx, Float64Index) + + for idx in [Index(np.array( + [True, False, True], dtype=bool)), Index([True, False, True]), + Index( + np.array( + [True, False, True], dtype=bool), dtype=bool), + Index( + [True, False, True], dtype=bool)]: + self.assertIsInstance(idx, Index) + self.assertEqual(idx.dtype, object) + + for idx in [Index( + np.array([1, 2, 3], dtype=int), dtype='category'), Index( + [1, 2, 3], dtype='category'), Index( + np.array([np.datetime64('2011-01-01'), np.datetime64( + '2011-01-02')]), dtype='category'), Index( + [datetime(2011, 1, 1), datetime(2011, 1, 2) + ], dtype='category')]: + self.assertIsInstance(idx, CategoricalIndex) + + for idx in [Index(np.array([np.datetime64('2011-01-01'), np.datetime64( + '2011-01-02')])), + Index([datetime(2011, 1, 1), datetime(2011, 1, 2)])]: + self.assertIsInstance(idx, DatetimeIndex) + + for idx in [Index( + np.array([np.datetime64('2011-01-01'), np.datetime64( + '2011-01-02')]), dtype=object), Index( + [datetime(2011, 1, 1), datetime(2011, 1, 2) + ], dtype=object)]: + self.assertNotIsInstance(idx, DatetimeIndex) + self.assertIsInstance(idx, Index) + self.assertEqual(idx.dtype, object) + + for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64( + 1, 'D')])), Index([timedelta(1), timedelta(1)])]: + self.assertIsInstance(idx, TimedeltaIndex) + + for idx in [Index( + np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]), + dtype=object), Index( + [timedelta(1), timedelta(1)], dtype=object)]: + self.assertNotIsInstance(idx, TimedeltaIndex) + self.assertIsInstance(idx, Index) + self.assertEqual(idx.dtype, object) + + def test_view_with_args(self): + + restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex', + 'empty'] + + for i in restricted: + ind = self.indices[i] + + # with arguments + self.assertRaises(TypeError, lambda: ind.view('i8')) + + # these are ok + for i in list(set(self.indices.keys()) - set(restricted)): + ind = self.indices[i] + + # with arguments + ind.view('i8') + + def test_legacy_pickle_identity(self): + + # GH 8431 + pth = tm.get_data_path() + s1 = pd.read_pickle(os.path.join(pth, 's1-0.12.0.pickle')) + s2 = pd.read_pickle(os.path.join(pth, 's2-0.12.0.pickle')) + self.assertFalse(s1.index.identical(s2.index)) + self.assertFalse(s1.index.equals(s2.index)) + + def test_astype(self): + casted = self.intIndex.astype('i8') + + # it works! + casted.get_loc(5) + + # pass on name + self.intIndex.name = 'foobar' + casted = self.intIndex.astype('i8') + self.assertEqual(casted.name, 'foobar') + + def test_equals(self): + # same + self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c']))) + + # different length + self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b']))) + + # same length, different values + self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'd']))) + + # Must also be an Index + self.assertFalse(Index(['a', 'b', 'c']).equals(['a', 'b', 'c'])) + + def test_insert(self): + + # GH 7256 + # validate neg/pos inserts + result = Index(['b', 'c', 'd']) + + # test 0th element + self.assertTrue(Index(['a', 'b', 'c', 'd']).equals(result.insert(0, + 'a'))) + + # test Nth element that follows Python list behavior + self.assertTrue(Index(['b', 'c', 'e', 'd']).equals(result.insert(-1, + 'e'))) + + # test loc +/- neq (0, -1) + self.assertTrue(result.insert(1, 'z').equals(result.insert(-2, 'z'))) + + # test empty + null_index = Index([]) + self.assertTrue(Index(['a']).equals(null_index.insert(0, 'a'))) + + def test_delete(self): + idx = Index(['a', 'b', 'c', 'd'], name='idx') + + expected = Index(['b', 'c', 'd'], name='idx') + result = idx.delete(0) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + expected = Index(['a', 'b', 'c'], name='idx') + result = idx.delete(-1) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = idx.delete(5) + + def test_identical(self): + + # index + i1 = Index(['a', 'b', 'c']) + i2 = Index(['a', 'b', 'c']) + + self.assertTrue(i1.identical(i2)) + + i1 = i1.rename('foo') + self.assertTrue(i1.equals(i2)) + self.assertFalse(i1.identical(i2)) + + i2 = i2.rename('foo') + self.assertTrue(i1.identical(i2)) + + i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')]) + i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False) + self.assertFalse(i3.identical(i4)) + + def test_is_(self): + ind = Index(range(10)) + self.assertTrue(ind.is_(ind)) + self.assertTrue(ind.is_(ind.view().view().view().view())) + self.assertFalse(ind.is_(Index(range(10)))) + self.assertFalse(ind.is_(ind.copy())) + self.assertFalse(ind.is_(ind.copy(deep=False))) + self.assertFalse(ind.is_(ind[:])) + self.assertFalse(ind.is_(ind.view(np.ndarray).view(Index))) + self.assertFalse(ind.is_(np.array(range(10)))) + + # quasi-implementation dependent + self.assertTrue(ind.is_(ind.view())) + ind2 = ind.view() + ind2.name = 'bob' + self.assertTrue(ind.is_(ind2)) + self.assertTrue(ind2.is_(ind)) + # doesn't matter if Indices are *actually* views of underlying data, + self.assertFalse(ind.is_(Index(ind.values))) + arr = np.array(range(1, 11)) + ind1 = Index(arr, copy=False) + ind2 = Index(arr, copy=False) + self.assertFalse(ind1.is_(ind2)) + + def test_asof(self): + d = self.dateIndex[0] + self.assertEqual(self.dateIndex.asof(d), d) + self.assertTrue(np.isnan(self.dateIndex.asof(d - timedelta(1)))) + + d = self.dateIndex[-1] + self.assertEqual(self.dateIndex.asof(d + timedelta(1)), d) + + d = self.dateIndex[0].to_datetime() + tm.assertIsInstance(self.dateIndex.asof(d), Timestamp) + + def test_asof_datetime_partial(self): + idx = pd.date_range('2010-01-01', periods=2, freq='m') + expected = Timestamp('2010-02-28') + result = idx.asof('2010-02') + self.assertEqual(result, expected) + self.assertFalse(isinstance(result, Index)) + + def test_nanosecond_index_access(self): + s = Series([Timestamp('20130101')]).values.view('i8')[0] + r = DatetimeIndex([s + 50 + i for i in range(100)]) + x = Series(np.random.randn(100), index=r) + + first_value = x.asof(x.index[0]) + + # this does not yet work, as parsing strings is done via dateutil + # self.assertEqual(first_value, + # x['2013-01-01 00:00:00.000000050+0000']) + + self.assertEqual( + first_value, + x[Timestamp(np.datetime64('2013-01-01 00:00:00.000000050+0000', + 'ns'))]) + + def test_comparators(self): + index = self.dateIndex + element = index[len(index) // 2] + element = _to_m8(element) + + arr = np.array(index) + + def _check(op): + arr_result = op(arr, element) + index_result = op(index, element) + + self.assertIsInstance(index_result, np.ndarray) + tm.assert_numpy_array_equal(arr_result, index_result) + + _check(operator.eq) + _check(operator.ne) + _check(operator.gt) + _check(operator.lt) + _check(operator.ge) + _check(operator.le) + + def test_booleanindex(self): + boolIdx = np.repeat(True, len(self.strIndex)).astype(bool) + boolIdx[5:30:2] = False + + subIndex = self.strIndex[boolIdx] + + for i, val in enumerate(subIndex): + self.assertEqual(subIndex.get_loc(val), i) + + subIndex = self.strIndex[list(boolIdx)] + for i, val in enumerate(subIndex): + self.assertEqual(subIndex.get_loc(val), i) + + def test_fancy(self): + sl = self.strIndex[[1, 2, 3]] + for i in sl: + self.assertEqual(i, sl[sl.get_loc(i)]) + + def test_empty_fancy(self): + empty_farr = np.array([], dtype=np.float_) + empty_iarr = np.array([], dtype=np.int_) + empty_barr = np.array([], dtype=np.bool_) + + # pd.DatetimeIndex is excluded, because it overrides getitem and should + # be tested separately. + for idx in [self.strIndex, self.intIndex, self.floatIndex]: + empty_idx = idx.__class__([]) + + self.assertTrue(idx[[]].identical(empty_idx)) + self.assertTrue(idx[empty_iarr].identical(empty_idx)) + self.assertTrue(idx[empty_barr].identical(empty_idx)) + + # np.ndarray only accepts ndarray of int & bool dtypes, so should + # Index. + self.assertRaises(IndexError, idx.__getitem__, empty_farr) + + def test_getitem(self): + arr = np.array(self.dateIndex) + exp = self.dateIndex[5] + exp = _to_m8(exp) + + self.assertEqual(exp, arr[5]) + + def test_intersection(self): + first = self.strIndex[:20] + second = self.strIndex[:10] + intersect = first.intersection(second) + self.assertTrue(tm.equalContents(intersect, second)) + + # Corner cases + inter = first.intersection(first) + self.assertIs(inter, first) + + idx1 = Index([1, 2, 3, 4, 5], name='idx') + # if target has the same name, it is preserved + idx2 = Index([3, 4, 5, 6, 7], name='idx') + expected2 = Index([3, 4, 5], name='idx') + result2 = idx1.intersection(idx2) + self.assertTrue(result2.equals(expected2)) + self.assertEqual(result2.name, expected2.name) + + # if target name is different, it will be reset + idx3 = Index([3, 4, 5, 6, 7], name='other') + expected3 = Index([3, 4, 5], name=None) + result3 = idx1.intersection(idx3) + self.assertTrue(result3.equals(expected3)) + self.assertEqual(result3.name, expected3.name) + + # non monotonic + idx1 = Index([5, 3, 2, 4, 1], name='idx') + idx2 = Index([4, 7, 6, 5, 3], name='idx') + result2 = idx1.intersection(idx2) + self.assertTrue(tm.equalContents(result2, expected2)) + self.assertEqual(result2.name, expected2.name) + + idx3 = Index([4, 7, 6, 5, 3], name='other') + result3 = idx1.intersection(idx3) + self.assertTrue(tm.equalContents(result3, expected3)) + self.assertEqual(result3.name, expected3.name) + + # non-monotonic non-unique + idx1 = Index(['A', 'B', 'A', 'C']) + idx2 = Index(['B', 'D']) + expected = Index(['B'], dtype='object') + result = idx1.intersection(idx2) + self.assertTrue(result.equals(expected)) + + def test_union(self): + first = self.strIndex[5:20] + second = self.strIndex[:10] + everything = self.strIndex[:20] + union = first.union(second) + self.assertTrue(tm.equalContents(union, everything)) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case) + self.assertTrue(tm.equalContents(result, everything)) + + # Corner cases + union = first.union(first) + self.assertIs(union, first) + + union = first.union([]) + self.assertIs(union, first) + + union = Index([]).union(first) + self.assertIs(union, first) + + # preserve names + first.name = 'A' + second.name = 'A' + union = first.union(second) + self.assertEqual(union.name, 'A') + + second.name = 'B' + union = first.union(second) + self.assertIsNone(union.name) + + def test_add(self): + + # - API change GH 8226 + with tm.assert_produces_warning(): + self.strIndex + self.strIndex + with tm.assert_produces_warning(): + self.strIndex + self.strIndex.tolist() + with tm.assert_produces_warning(): + self.strIndex.tolist() + self.strIndex + + with tm.assert_produces_warning(RuntimeWarning): + firstCat = self.strIndex.union(self.dateIndex) + secondCat = self.strIndex.union(self.strIndex) + + if self.dateIndex.dtype == np.object_: + appended = np.append(self.strIndex, self.dateIndex) + else: + appended = np.append(self.strIndex, self.dateIndex.astype('O')) + + self.assertTrue(tm.equalContents(firstCat, appended)) + self.assertTrue(tm.equalContents(secondCat, self.strIndex)) + tm.assert_contains_all(self.strIndex, firstCat) + tm.assert_contains_all(self.strIndex, secondCat) + tm.assert_contains_all(self.dateIndex, firstCat) + + # test add and radd + idx = Index(list('abc')) + expected = Index(['a1', 'b1', 'c1']) + self.assert_index_equal(idx + '1', expected) + expected = Index(['1a', '1b', '1c']) + self.assert_index_equal('1' + idx, expected) + + def test_append_multiple(self): + index = Index(['a', 'b', 'c', 'd', 'e', 'f']) + + foos = [index[:2], index[2:4], index[4:]] + result = foos[0].append(foos[1:]) + self.assertTrue(result.equals(index)) + + # empty + result = index.append([]) + self.assertTrue(result.equals(index)) + + def test_append_empty_preserve_name(self): + left = Index([], name='foo') + right = Index([1, 2, 3], name='foo') + + result = left.append(right) + self.assertEqual(result.name, 'foo') + + left = Index([], name='foo') + right = Index([1, 2, 3], name='bar') + + result = left.append(right) + self.assertIsNone(result.name) + + def test_add_string(self): + # from bug report + index = Index(['a', 'b', 'c']) + index2 = index + 'foo' + + self.assertNotIn('a', index2) + self.assertIn('afoo', index2) + + def test_iadd_string(self): + index = pd.Index(['a', 'b', 'c']) + # doesn't fail test unless there is a check before `+=` + self.assertIn('a', index) + + index += '_x' + self.assertIn('a_x', index) + + def test_difference(self): + + first = self.strIndex[5:20] + second = self.strIndex[:10] + answer = self.strIndex[10:20] + first.name = 'name' + # different names + result = first.difference(second) + + self.assertTrue(tm.equalContents(result, answer)) + self.assertEqual(result.name, None) + + # same names + second.name = 'name' + result = first.difference(second) + self.assertEqual(result.name, 'name') + + # with empty + result = first.difference([]) + self.assertTrue(tm.equalContents(result, first)) + self.assertEqual(result.name, first.name) + + # with everythin + result = first.difference(first) + self.assertEqual(len(result), 0) + self.assertEqual(result.name, first.name) + + def test_symmetric_diff(self): + # smoke + idx1 = Index([1, 2, 3, 4], name='idx1') + idx2 = Index([2, 3, 4, 5]) + result = idx1.sym_diff(idx2) + expected = Index([1, 5]) + self.assertTrue(tm.equalContents(result, expected)) + self.assertIsNone(result.name) + + # __xor__ syntax + expected = idx1 ^ idx2 + self.assertTrue(tm.equalContents(result, expected)) + self.assertIsNone(result.name) + + # multiIndex + idx1 = MultiIndex.from_tuples(self.tuples) + idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) + result = idx1.sym_diff(idx2) + expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) + self.assertTrue(tm.equalContents(result, expected)) + + # nans: + # GH #6444, sorting of nans. Make sure the number of nans is right + # and the correct non-nan values are there. punt on sorting. + idx1 = Index([1, 2, 3, np.nan]) + idx2 = Index([0, 1, np.nan]) + result = idx1.sym_diff(idx2) + # expected = Index([0.0, np.nan, 2.0, 3.0, np.nan]) + + nans = pd.isnull(result) + self.assertEqual(nans.sum(), 1) + self.assertEqual((~nans).sum(), 3) + [self.assertIn(x, result) for x in [0.0, 2.0, 3.0]] + + # other not an Index: + idx1 = Index([1, 2, 3, 4], name='idx1') + idx2 = np.array([2, 3, 4, 5]) + expected = Index([1, 5]) + result = idx1.sym_diff(idx2) + self.assertTrue(tm.equalContents(result, expected)) + self.assertEqual(result.name, 'idx1') + + result = idx1.sym_diff(idx2, result_name='new_name') + self.assertTrue(tm.equalContents(result, expected)) + self.assertEqual(result.name, 'new_name') + + def test_is_numeric(self): + self.assertFalse(self.dateIndex.is_numeric()) + self.assertFalse(self.strIndex.is_numeric()) + self.assertTrue(self.intIndex.is_numeric()) + self.assertTrue(self.floatIndex.is_numeric()) + self.assertFalse(self.catIndex.is_numeric()) + + def test_is_object(self): + self.assertTrue(self.strIndex.is_object()) + self.assertTrue(self.boolIndex.is_object()) + self.assertFalse(self.catIndex.is_object()) + self.assertFalse(self.intIndex.is_object()) + self.assertFalse(self.dateIndex.is_object()) + self.assertFalse(self.floatIndex.is_object()) + + def test_is_all_dates(self): + self.assertTrue(self.dateIndex.is_all_dates) + self.assertFalse(self.strIndex.is_all_dates) + self.assertFalse(self.intIndex.is_all_dates) + + def test_summary(self): + self._check_method_works(Index.summary) + # GH3869 + ind = Index(['{other}%s', "~:{range}:0"], name='A') + result = ind.summary() + # shouldn't be formatted accidentally. + self.assertIn('~:{range}:0', result) + self.assertIn('{other}%s', result) + + def test_format(self): + self._check_method_works(Index.format) + + index = Index([datetime.now()]) + + # windows has different precision on datetime.datetime.now (it doesn't + # include us since the default for Timestamp shows these but Index + # formating does not we are skipping + if not is_platform_windows(): + formatted = index.format() + expected = [str(index[0])] + self.assertEqual(formatted, expected) + + # 2845 + index = Index([1, 2.0 + 3.0j, np.nan]) + formatted = index.format() + expected = [str(index[0]), str(index[1]), u('NaN')] + self.assertEqual(formatted, expected) + + # is this really allowed? + index = Index([1, 2.0 + 3.0j, None]) + formatted = index.format() + expected = [str(index[0]), str(index[1]), u('NaN')] + self.assertEqual(formatted, expected) + + self.strIndex[:0].format() + + def test_format_with_name_time_info(self): + # bug I fixed 12/20/2011 + inc = timedelta(hours=4) + dates = Index([dt + inc for dt in self.dateIndex], name='something') + + formatted = dates.format(name=True) + self.assertEqual(formatted[0], 'something') + + def test_format_datetime_with_time(self): + t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) + + result = t.format() + expected = ['2012-02-07 00:00:00', '2012-02-07 23:00:00'] + self.assertEqual(len(result), 2) + self.assertEqual(result, expected) + + def test_format_none(self): + values = ['a', 'b', 'c', None] + + idx = Index(values) + idx.format() + self.assertIsNone(idx[3]) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def _check_method_works(self, method): + method(self.empty) + method(self.dateIndex) + method(self.unicodeIndex) + method(self.strIndex) + method(self.intIndex) + method(self.tuples) + method(self.catIndex) + + def test_get_indexer(self): + idx1 = Index([1, 2, 3, 4, 5]) + idx2 = Index([2, 4, 6]) + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, [1, 3, -1]) + + r1 = idx2.get_indexer(idx1, method='pad') + e1 = [-1, 0, 0, 1, 1] + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='pad') + assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method='backfill') + e1 = [0, 0, 1, 1, 2] + assert_almost_equal(r1, e1) + + rbfill1 = idx2.get_indexer(idx1, method='bfill') + assert_almost_equal(r1, rbfill1) + + r2 = idx2.get_indexer(idx1[::-1], method='backfill') + assert_almost_equal(r2, e1[::-1]) + + def test_get_indexer_invalid(self): + # GH10411 + idx = Index(np.arange(10)) + + with tm.assertRaisesRegexp(ValueError, 'tolerance argument'): + idx.get_indexer([1, 0], tolerance=1) + + with tm.assertRaisesRegexp(ValueError, 'limit argument'): + idx.get_indexer([1, 0], limit=1) + + def test_get_indexer_nearest(self): + idx = Index(np.arange(10)) + + all_methods = ['pad', 'backfill', 'nearest'] + for method in all_methods: + actual = idx.get_indexer([0, 5, 9], method=method) + tm.assert_numpy_array_equal(actual, [0, 5, 9]) + + actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0) + tm.assert_numpy_array_equal(actual, [0, 5, 9]) + + for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], [0, 2, + 9]]): + actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) + tm.assert_numpy_array_equal(actual, expected) + + actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, + tolerance=1) + tm.assert_numpy_array_equal(actual, expected) + + for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], + [0, 2, -1]]): + actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, + tolerance=0.2) + tm.assert_numpy_array_equal(actual, expected) + + with tm.assertRaisesRegexp(ValueError, 'limit argument'): + idx.get_indexer([1, 0], method='nearest', limit=1) + + def test_get_indexer_nearest_decreasing(self): + idx = Index(np.arange(10))[::-1] + + all_methods = ['pad', 'backfill', 'nearest'] + for method in all_methods: + actual = idx.get_indexer([0, 5, 9], method=method) + tm.assert_numpy_array_equal(actual, [9, 4, 0]) + + for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1], [9, 7, + 0]]): + actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) + tm.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_strings(self): + idx = pd.Index(['b', 'c']) + + actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='pad') + expected = [-1, 0, 1, 1] + tm.assert_numpy_array_equal(actual, expected) + + actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='backfill') + expected = [0, 0, 1, -1] + tm.assert_numpy_array_equal(actual, expected) + + with tm.assertRaises(TypeError): + idx.get_indexer(['a', 'b', 'c', 'd'], method='nearest') + + with tm.assertRaises(TypeError): + idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) + + def test_get_loc(self): + idx = pd.Index([0, 1, 2]) + all_methods = [None, 'pad', 'backfill', 'nearest'] + for method in all_methods: + self.assertEqual(idx.get_loc(1, method=method), 1) + if method is not None: + self.assertEqual(idx.get_loc(1, method=method, tolerance=0), 1) + with tm.assertRaises(TypeError): + idx.get_loc([1, 2], method=method) + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + self.assertEqual(idx.get_loc(1.1, method), loc) + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + self.assertEqual(idx.get_loc(1.1, method, tolerance=1), loc) + + for method in ['pad', 'backfill', 'nearest']: + with tm.assertRaises(KeyError): + idx.get_loc(1.1, method, tolerance=0.05) + + with tm.assertRaisesRegexp(ValueError, 'must be numeric'): + idx.get_loc(1.1, 'nearest', tolerance='invalid') + with tm.assertRaisesRegexp(ValueError, 'tolerance .* valid if'): + idx.get_loc(1.1, tolerance=1) + + idx = pd.Index(['a', 'c']) + with tm.assertRaises(TypeError): + idx.get_loc('a', method='nearest') + with tm.assertRaises(TypeError): + idx.get_loc('a', method='pad', tolerance='invalid') + + def test_slice_locs(self): + for dtype in [int, float]: + idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(idx) + + self.assertEqual(idx.slice_locs(start=2), (2, n)) + self.assertEqual(idx.slice_locs(start=3), (3, n)) + self.assertEqual(idx.slice_locs(3, 8), (3, 6)) + self.assertEqual(idx.slice_locs(5, 10), (3, n)) + self.assertEqual(idx.slice_locs(end=8), (0, 6)) + self.assertEqual(idx.slice_locs(end=9), (0, 7)) + + # reversed + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs(8, 2), (2, 6)) + self.assertEqual(idx2.slice_locs(7, 3), (2, 5)) + + # float slicing + idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=float)) + n = len(idx) + self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) + self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) + self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + + # int slicing with floats + idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int)) + self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) + self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) + self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) + + def test_slice_locs_dup(self): + idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) + self.assertEqual(idx.slice_locs('a', 'd'), (0, 6)) + self.assertEqual(idx.slice_locs(end='d'), (0, 6)) + self.assertEqual(idx.slice_locs('a', 'c'), (0, 4)) + self.assertEqual(idx.slice_locs('b', 'd'), (2, 6)) + + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs('d', 'a'), (0, 6)) + self.assertEqual(idx2.slice_locs(end='a'), (0, 6)) + self.assertEqual(idx2.slice_locs('d', 'b'), (0, 4)) + self.assertEqual(idx2.slice_locs('c', 'a'), (2, 6)) + + for dtype in [int, float]: + idx = Index(np.array([10, 12, 12, 14], dtype=dtype)) + self.assertEqual(idx.slice_locs(12, 12), (1, 3)) + self.assertEqual(idx.slice_locs(11, 13), (1, 3)) + + idx2 = idx[::-1] + self.assertEqual(idx2.slice_locs(12, 12), (1, 3)) + self.assertEqual(idx2.slice_locs(13, 11), (1, 3)) + + def test_slice_locs_na(self): + idx = Index([np.nan, 1, 2]) + self.assertRaises(KeyError, idx.slice_locs, start=1.5) + self.assertRaises(KeyError, idx.slice_locs, end=1.5) + self.assertEqual(idx.slice_locs(1), (1, 3)) + self.assertEqual(idx.slice_locs(np.nan), (0, 3)) + + idx = Index([0, np.nan, np.nan, 1, 2]) + self.assertEqual(idx.slice_locs(np.nan), (1, 5)) + + def test_slice_locs_negative_step(self): + idx = Index(list('bcdxy')) + + SLC = pd.IndexSlice + + def check_slice(in_slice, expected): + s_start, s_stop = idx.slice_locs(in_slice.start, in_slice.stop, + in_slice.step) + result = idx[s_start:s_stop:in_slice.step] + expected = pd.Index(list(expected)) + self.assertTrue(result.equals(expected)) + + for in_slice, expected in [ + (SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''), + (SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'), + (SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'), + (SLC['y'::-4], 'yb'), + # absent labels + (SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'), + (SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'), + (SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'), + (SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''), + (SLC['m':'m':-1], '') + ]: + check_slice(in_slice, expected) + + def test_drop(self): + n = len(self.strIndex) + + drop = self.strIndex[lrange(5, 10)] + dropped = self.strIndex.drop(drop) + expected = self.strIndex[lrange(5) + lrange(10, n)] + self.assertTrue(dropped.equals(expected)) + + self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) + self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar']) + + # errors='ignore' + mixed = drop.tolist() + ['foo'] + dropped = self.strIndex.drop(mixed, errors='ignore') + expected = self.strIndex[lrange(5) + lrange(10, n)] + self.assert_index_equal(dropped, expected) + + dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore') + expected = self.strIndex[lrange(n)] + self.assert_index_equal(dropped, expected) + + dropped = self.strIndex.drop(self.strIndex[0]) + expected = self.strIndex[1:] + self.assert_index_equal(dropped, expected) + + ser = Index([1, 2, 3]) + dropped = ser.drop(1) + expected = Index([2, 3]) + self.assert_index_equal(dropped, expected) + + # errors='ignore' + self.assertRaises(ValueError, ser.drop, [3, 4]) + + dropped = ser.drop(4, errors='ignore') + expected = Index([1, 2, 3]) + self.assert_index_equal(dropped, expected) + + dropped = ser.drop([3, 4, 5], errors='ignore') + expected = Index([1, 2]) + self.assert_index_equal(dropped, expected) + + def test_tuple_union_bug(self): + import pandas + import numpy as np + + aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], + dtype=[('num', int), ('let', 'a1')]) + aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), + (2, 'B'), (1, 'C'), (2, 'C')], + dtype=[('num', int), ('let', 'a1')]) + + idx1 = pandas.Index(aidx1) + idx2 = pandas.Index(aidx2) + + # intersection broken? + int_idx = idx1.intersection(idx2) + # needs to be 1d like idx1 and idx2 + expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2))) + self.assertEqual(int_idx.ndim, 1) + self.assertTrue(int_idx.equals(expected)) + + # union broken + union_idx = idx1.union(idx2) + expected = idx2 + self.assertEqual(union_idx.ndim, 1) + self.assertTrue(union_idx.equals(expected)) + + def test_is_monotonic_incomparable(self): + index = Index([5, datetime.now(), 7]) + self.assertFalse(index.is_monotonic) + self.assertFalse(index.is_monotonic_decreasing) + + def test_get_set_value(self): + values = np.random.randn(100) + date = self.dateIndex[67] + + assert_almost_equal(self.dateIndex.get_value(values, date), values[67]) + + self.dateIndex.set_value(values, date, 10) + self.assertEqual(values[67], 10) + + def test_isin(self): + values = ['foo', 'bar', 'quux'] + + idx = Index(['qux', 'baz', 'foo', 'bar']) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = Index([]) + result = idx.isin(values) + self.assertEqual(len(result), 0) + self.assertEqual(result.dtype, np.bool_) + + def test_isin_nan(self): + tm.assert_numpy_array_equal( + Index(['a', np.nan]).isin([np.nan]), [False, True]) + tm.assert_numpy_array_equal( + Index(['a', pd.NaT]).isin([pd.NaT]), [False, True]) + tm.assert_numpy_array_equal( + Index(['a', np.nan]).isin([float('nan')]), [False, False]) + tm.assert_numpy_array_equal( + Index(['a', np.nan]).isin([pd.NaT]), [False, False]) + # Float64Index overrides isin, so must be checked separately + tm.assert_numpy_array_equal( + Float64Index([1.0, np.nan]).isin([np.nan]), [False, True]) + tm.assert_numpy_array_equal( + Float64Index([1.0, np.nan]).isin([float('nan')]), [False, True]) + tm.assert_numpy_array_equal( + Float64Index([1.0, np.nan]).isin([pd.NaT]), [False, True]) + + def test_isin_level_kwarg(self): + def check_idx(idx): + values = idx.tolist()[-2:] + ['nonexisting'] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(values, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(values, level=-1)) + + self.assertRaises(IndexError, idx.isin, values, level=1) + self.assertRaises(IndexError, idx.isin, values, level=10) + self.assertRaises(IndexError, idx.isin, values, level=-2) + + self.assertRaises(KeyError, idx.isin, values, level=1.0) + self.assertRaises(KeyError, idx.isin, values, level='foobar') + + idx.name = 'foobar' + tm.assert_numpy_array_equal(expected, + idx.isin(values, level='foobar')) + + self.assertRaises(KeyError, idx.isin, values, level='xyzzy') + self.assertRaises(KeyError, idx.isin, values, level=np.nan) + + check_idx(Index(['qux', 'baz', 'foo', 'bar'])) + # Float64Index overrides isin, so must be checked separately + check_idx(Float64Index([1.0, 2.0, 3.0, 4.0])) + + def test_boolean_cmp(self): + values = [1, 2, 3, 4] + + idx = Index(values) + res = (idx == values) + + tm.assert_numpy_array_equal(res, np.array( + [True, True, True, True], dtype=bool)) + + def test_get_level_values(self): + result = self.strIndex.get_level_values(0) + self.assertTrue(result.equals(self.strIndex)) + + def test_slice_keep_name(self): + idx = Index(['a', 'b'], name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_join_self(self): + # instance attributes of the form self.Index + indices = 'unicode', 'str', 'date', 'int', 'float' + kinds = 'outer', 'inner', 'left', 'right' + for index_kind in indices: + res = getattr(self, '{0}Index'.format(index_kind)) + + for kind in kinds: + joined = res.join(res, how=kind) + self.assertIs(res, joined) + + def test_str_attribute(self): + # GH9068 + methods = ['strip', 'rstrip', 'lstrip'] + idx = Index([' jack', 'jill ', ' jesse ', 'frank']) + for method in methods: + expected = Index([getattr(str, method)(x) for x in idx.values]) + tm.assert_index_equal( + getattr(Index.str, method)(idx.str), expected) + + # create a few instances that are not able to use .str accessor + indices = [Index(range(5)), tm.makeDateIndex(10), + MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), + PeriodIndex(start='2000', end='2010', freq='A')] + for idx in indices: + with self.assertRaisesRegexp(AttributeError, + 'only use .str accessor'): + idx.str.repeat(2) + + idx = Index(['a b c', 'd e', 'f']) + expected = Index([['a', 'b', 'c'], ['d', 'e'], ['f']]) + tm.assert_index_equal(idx.str.split(), expected) + tm.assert_index_equal(idx.str.split(expand=False), expected) + + expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan), + ('f', np.nan, np.nan)]) + tm.assert_index_equal(idx.str.split(expand=True), expected) + + # test boolean case, should return np.array instead of boolean Index + idx = Index(['a1', 'a2', 'b1', 'b2']) + expected = np.array([True, True, False, False]) + tm.assert_numpy_array_equal(idx.str.startswith('a'), expected) + self.assertIsInstance(idx.str.startswith('a'), np.ndarray) + s = Series(range(4), index=idx) + expected = Series(range(2), index=['a1', 'a2']) + tm.assert_series_equal(s[s.index.str.startswith('a')], expected) + + def test_tab_completion(self): + # GH 9910 + idx = Index(list('abcd')) + self.assertTrue('str' in dir(idx)) + + idx = Index(range(4)) + self.assertTrue('str' not in dir(idx)) + + def test_indexing_doesnt_change_class(self): + idx = Index([1, 2, 3, 'a', 'b', 'c']) + + self.assertTrue(idx[1:3].identical(pd.Index([2, 3], dtype=np.object_))) + self.assertTrue(idx[[0, 1]].identical(pd.Index( + [1, 2], dtype=np.object_))) + + def test_outer_join_sort(self): + left_idx = Index(np.random.permutation(15)) + right_idx = tm.makeDateIndex(10) + + with tm.assert_produces_warning(RuntimeWarning): + joined = left_idx.join(right_idx, how='outer') + + # right_idx in this case because DatetimeIndex has join precedence over + # Int64Index + with tm.assert_produces_warning(RuntimeWarning): + expected = right_idx.astype(object).union(left_idx.astype(object)) + tm.assert_index_equal(joined, expected) + + def test_nan_first_take_datetime(self): + idx = Index([pd.NaT, Timestamp('20130101'), Timestamp('20130102')]) + res = idx.take([-1, 0, 1]) + exp = Index([idx[-1], idx[0], idx[1]]) + tm.assert_index_equal(res, exp) + + def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): + # GH6552 + idx = pd.Index([0, 1, 2]) + + dt_idx = pd.date_range('20130101', periods=3) + + idx.name = None + self.assertEqual(idx.reindex([])[0].name, None) + self.assertEqual(idx.reindex(np.array([]))[0].name, None) + self.assertEqual(idx.reindex(idx.tolist())[0].name, None) + self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, None) + self.assertEqual(idx.reindex(idx.values)[0].name, None) + self.assertEqual(idx.reindex(idx.values[:-1])[0].name, None) + + # Must preserve name even if dtype changes. + self.assertEqual(idx.reindex(dt_idx.values)[0].name, None) + self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, None) + + idx.name = 'foobar' + self.assertEqual(idx.reindex([])[0].name, 'foobar') + self.assertEqual(idx.reindex(np.array([]))[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.tolist())[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.values)[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.values[:-1])[0].name, 'foobar') + + # Must preserve name even if dtype changes. + self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar') + self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar') + + def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): + # GH7774 + idx = pd.Index(list('abc')) + + def get_reindex_type(target): + return idx.reindex(target)[0].dtype.type + + self.assertEqual(get_reindex_type([]), np.object_) + self.assertEqual(get_reindex_type(np.array([])), np.object_) + self.assertEqual(get_reindex_type(np.array([], dtype=np.int64)), + np.object_) + + def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): + # GH7774 + idx = pd.Index(list('abc')) + + def get_reindex_type(target): + return idx.reindex(target)[0].dtype.type + + self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64) + self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64) + self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64) + + reindexed = idx.reindex(pd.MultiIndex( + [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0] + self.assertEqual(reindexed.levels[0].dtype.type, np.int64) + self.assertEqual(reindexed.levels[1].dtype.type, np.float64) + + def test_groupby(self): + idx = Index(range(5)) + groups = idx.groupby(np.array([1, 1, 2, 2, 2])) + exp = {1: [0, 1], 2: [2, 3, 4]} + tm.assert_dict_equal(groups, exp) + + def test_equals_op_multiindex(self): + # GH9785 + # test comparisons of multiindex + from pandas.compat import StringIO + df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1]) + tm.assert_numpy_array_equal(df.index == df.index, + np.array([True, True])) + + mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)]) + tm.assert_numpy_array_equal(df.index == mi1, np.array([True, True])) + mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)]) + tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False])) + mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + df.index == mi3 + + index_a = Index(['foo', 'bar', 'baz']) + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + df.index == index_a + tm.assert_numpy_array_equal(index_a == mi3, + np.array([False, False, False])) + + def test_conversion_preserves_name(self): + # GH 10875 + i = pd.Index(['01:02:03', '01:02:04'], name='label') + self.assertEqual(i.name, pd.to_datetime(i).name) + self.assertEqual(i.name, pd.to_timedelta(i).name) + + def test_string_index_repr(self): + # py3/py2 repr can differ because of "u" prefix + # which also affects to displayed element size + + # short + idx = pd.Index(['a', 'bb', 'ccc']) + if PY3: + expected = u"""Index(['a', 'bb', 'ccc'], dtype='object')""" + self.assertEqual(repr(idx), expected) + else: + expected = u"""Index([u'a', u'bb', u'ccc'], dtype='object')""" + self.assertEqual(unicode(idx), expected) + + # multiple lines + idx = pd.Index(['a', 'bb', 'ccc'] * 10) + if PY3: + expected = u"""\ +Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', + 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', + 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + dtype='object')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""\ +Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', + u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', + u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], + dtype='object')""" + + self.assertEqual(unicode(idx), expected) + + # truncated + idx = pd.Index(['a', 'bb', 'ccc'] * 100) + if PY3: + expected = u"""\ +Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + ... + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + dtype='object', length=300)""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""\ +Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', + ... + u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], + dtype='object', length=300)""" + + self.assertEqual(unicode(idx), expected) + + # short + idx = pd.Index([u'あ', u'いい', u'ううう']) + if PY3: + expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')""" + self.assertEqual(repr(idx), expected) + else: + expected = u"""\ +Index([u'あ', u'いい', u'ううう'], dtype='object')""" + self.assertEqual(unicode(idx), expected) + + # multiple lines + idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) + if PY3: + expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + dtype='object')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], + dtype='object')""" + + self.assertEqual(unicode(idx), expected) + + # truncated + idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) + if PY3: + expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + dtype='object', length=300)""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + ... + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], + dtype='object', length=300)""" + + self.assertEqual(unicode(idx), expected) + + # Emable Unicode option ----------------------------------------- + with cf.option_context('display.unicode.east_asian_width', True): + + # short + idx = pd.Index([u'あ', u'いい', u'ううう']) + if PY3: + expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')""" + self.assertEqual(repr(idx), expected) + else: + expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')""" + self.assertEqual(unicode(idx), expected) + + # multiple lines + idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) + if PY3: + expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう'], + dtype='object')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], + dtype='object')""" + + self.assertEqual(unicode(idx), expected) + + # truncated + idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) + if PY3: + expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう'], + dtype='object', length=300)""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', + u'ううう', u'あ', + ... + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう'], + dtype='object', length=300)""" + + self.assertEqual(unicode(idx), expected) + + +def test_get_combined_index(): + from pandas.core.index import _get_combined_index + result = _get_combined_index([]) + assert (result.equals(Index([]))) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py new file mode 100644 index 0000000000000..4ec2f37ba15fb --- /dev/null +++ b/pandas/tests/indexes/test_category.py @@ -0,0 +1,662 @@ +# TODO(wesm): fix long line flake8 issues +# flake8: noqa + +import pandas.util.testing as tm +from pandas.indexes.api import Index, CategoricalIndex +from .common import Base + +from pandas.compat import range, PY3 + +import numpy as np + +from pandas import Categorical, compat +from pandas.util.testing import assert_almost_equal +import pandas.core.config as cf +import pandas as pd + +if PY3: + unicode = lambda x: x + + +class TestCategoricalIndex(Base, tm.TestCase): + _holder = CategoricalIndex + + def setUp(self): + self.indices = dict(catIndex=tm.makeCategoricalIndex(100)) + self.setup_indices() + + def create_index(self, categories=None, ordered=False): + if categories is None: + categories = list('cab') + return CategoricalIndex( + list('aabbca'), categories=categories, ordered=ordered) + + def test_construction(self): + + ci = self.create_index(categories=list('abcd')) + categories = ci.categories + + result = Index(ci) + tm.assert_index_equal(result, ci, exact=True) + self.assertFalse(result.ordered) + + result = Index(ci.values) + tm.assert_index_equal(result, ci, exact=True) + self.assertFalse(result.ordered) + + # empty + result = CategoricalIndex(categories=categories) + self.assertTrue(result.categories.equals(Index(categories))) + tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) + self.assertFalse(result.ordered) + + # passing categories + result = CategoricalIndex(list('aabbca'), categories=categories) + self.assertTrue(result.categories.equals(Index(categories))) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) + + c = pd.Categorical(list('aabbca')) + result = CategoricalIndex(c) + self.assertTrue(result.categories.equals(Index(list('abc')))) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) + self.assertFalse(result.ordered) + + result = CategoricalIndex(c, categories=categories) + self.assertTrue(result.categories.equals(Index(categories))) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) + self.assertFalse(result.ordered) + + ci = CategoricalIndex(c, categories=list('abcd')) + result = CategoricalIndex(ci) + self.assertTrue(result.categories.equals(Index(categories))) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) + self.assertFalse(result.ordered) + + result = CategoricalIndex(ci, categories=list('ab')) + self.assertTrue(result.categories.equals(Index(list('ab')))) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, -1, 0], dtype='int8')) + self.assertFalse(result.ordered) + + result = CategoricalIndex(ci, categories=list('ab'), ordered=True) + self.assertTrue(result.categories.equals(Index(list('ab')))) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, -1, 0], dtype='int8')) + self.assertTrue(result.ordered) + + # turn me to an Index + result = Index(np.array(ci)) + self.assertIsInstance(result, Index) + self.assertNotIsInstance(result, CategoricalIndex) + + def test_construction_with_dtype(self): + + # specify dtype + ci = self.create_index(categories=list('abc')) + + result = Index(np.array(ci), dtype='category') + tm.assert_index_equal(result, ci, exact=True) + + result = Index(np.array(ci).tolist(), dtype='category') + tm.assert_index_equal(result, ci, exact=True) + + # these are generally only equal when the categories are reordered + ci = self.create_index() + + result = Index( + np.array(ci), dtype='category').reorder_categories(ci.categories) + tm.assert_index_equal(result, ci, exact=True) + + # make sure indexes are handled + expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], + ordered=True) + idx = Index(range(3)) + result = CategoricalIndex(idx, categories=idx, ordered=True) + tm.assert_index_equal(result, expected, exact=True) + + def test_disallow_set_ops(self): + + # GH 10039 + # set ops (+/-) raise TypeError + idx = pd.Index(pd.Categorical(['a', 'b'])) + + self.assertRaises(TypeError, lambda: idx - idx) + self.assertRaises(TypeError, lambda: idx + idx) + self.assertRaises(TypeError, lambda: idx - ['a', 'b']) + self.assertRaises(TypeError, lambda: idx + ['a', 'b']) + self.assertRaises(TypeError, lambda: ['a', 'b'] - idx) + self.assertRaises(TypeError, lambda: ['a', 'b'] + idx) + + def test_method_delegation(self): + + ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) + result = ci.set_categories(list('cab')) + tm.assert_index_equal(result, CategoricalIndex( + list('aabbca'), categories=list('cab'))) + + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) + result = ci.rename_categories(list('efg')) + tm.assert_index_equal(result, CategoricalIndex( + list('ffggef'), categories=list('efg'))) + + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) + result = ci.add_categories(['d']) + tm.assert_index_equal(result, CategoricalIndex( + list('aabbca'), categories=list('cabd'))) + + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) + result = ci.remove_categories(['c']) + tm.assert_index_equal(result, CategoricalIndex( + list('aabb') + [np.nan] + ['a'], categories=list('ab'))) + + ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) + result = ci.as_unordered() + tm.assert_index_equal(result, ci) + + ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) + result = ci.as_ordered() + tm.assert_index_equal(result, CategoricalIndex( + list('aabbca'), categories=list('cabdef'), ordered=True)) + + # invalid + self.assertRaises(ValueError, lambda: ci.set_categories( + list('cab'), inplace=True)) + + def test_contains(self): + + ci = self.create_index(categories=list('cabdef')) + + self.assertTrue('a' in ci) + self.assertTrue('z' not in ci) + self.assertTrue('e' not in ci) + self.assertTrue(np.nan not in ci) + + # assert codes NOT in index + self.assertFalse(0 in ci) + self.assertFalse(1 in ci) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + ci = CategoricalIndex( + list('aabbca'), categories=list('cabdef') + [np.nan]) + self.assertFalse(np.nan in ci) + + ci = CategoricalIndex( + list('aabbca') + [np.nan], categories=list('cabdef')) + self.assertTrue(np.nan in ci) + + def test_min_max(self): + + ci = self.create_index(ordered=False) + self.assertRaises(TypeError, lambda: ci.min()) + self.assertRaises(TypeError, lambda: ci.max()) + + ci = self.create_index(ordered=True) + + self.assertEqual(ci.min(), 'c') + self.assertEqual(ci.max(), 'b') + + def test_append(self): + + ci = self.create_index() + categories = ci.categories + + # append cats with the same categories + result = ci[:3].append(ci[3:]) + tm.assert_index_equal(result, ci, exact=True) + + foos = [ci[:1], ci[1:3], ci[3:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, ci, exact=True) + + # empty + result = ci.append([]) + tm.assert_index_equal(result, ci, exact=True) + + # appending with different categories or reoreded is not ok + self.assertRaises( + TypeError, + lambda: ci.append(ci.values.set_categories(list('abcd')))) + self.assertRaises( + TypeError, + lambda: ci.append(ci.values.reorder_categories(list('abc')))) + + # with objects + result = ci.append(['c', 'a']) + expected = CategoricalIndex(list('aabbcaca'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # invalid objects + self.assertRaises(TypeError, lambda: ci.append(['a', 'd'])) + + def test_insert(self): + + ci = self.create_index() + categories = ci.categories + + # test 0th element + result = ci.insert(0, 'a') + expected = CategoricalIndex(list('aaabbca'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test Nth element that follows Python list behavior + result = ci.insert(-1, 'a') + expected = CategoricalIndex(list('aabbcaa'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test empty + result = CategoricalIndex(categories=categories).insert(0, 'a') + expected = CategoricalIndex(['a'], categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # invalid + self.assertRaises(TypeError, lambda: ci.insert(0, 'd')) + + def test_delete(self): + + ci = self.create_index() + categories = ci.categories + + result = ci.delete(0) + expected = CategoricalIndex(list('abbca'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + result = ci.delete(-1) + expected = CategoricalIndex(list('aabbc'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + with tm.assertRaises((IndexError, ValueError)): + # either depeidnig on numpy version + result = ci.delete(10) + + def test_astype(self): + + ci = self.create_index() + result = ci.astype('category') + tm.assert_index_equal(result, ci, exact=True) + + result = ci.astype(object) + self.assertTrue(result.equals(Index(np.array(ci)))) + + # this IS equal, but not the same class + self.assertTrue(result.equals(ci)) + self.assertIsInstance(result, Index) + self.assertNotIsInstance(result, CategoricalIndex) + + def test_reindex_base(self): + + # determined by cat ordering + idx = self.create_index() + expected = np.array([4, 0, 1, 5, 2, 3]) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): + idx.get_indexer(idx, method='invalid') + + def test_reindexing(self): + + ci = self.create_index() + oidx = Index(np.array(ci)) + + for n in [1, 2, 5, len(ci)]: + finder = oidx[np.random.randint(0, len(ci), size=n)] + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + + def test_reindex_dtype(self): + res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c' + ]) + tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) + + res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex( + Categorical(['a', 'c'])) + tm.assert_index_equal(res, CategoricalIndex( + ['a', 'a', 'c'], categories=['a', 'c']), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) + + res, indexer = CategoricalIndex( + ['a', 'b', 'c', 'a' + ], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c']) + tm.assert_index_equal(res, Index( + ['a', 'a', 'c'], dtype='object'), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) + + res, indexer = CategoricalIndex( + ['a', 'b', 'c', 'a'], + categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c'])) + tm.assert_index_equal(res, CategoricalIndex( + ['a', 'a', 'c'], categories=['a', 'c']), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) + + def test_duplicates(self): + + idx = CategoricalIndex([0, 0, 0], name='foo') + self.assertFalse(idx.is_unique) + self.assertTrue(idx.has_duplicates) + + expected = CategoricalIndex([0], name='foo') + self.assert_index_equal(idx.drop_duplicates(), expected) + + def test_get_indexer(self): + + idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) + idx2 = CategoricalIndex(list('abf')) + + for indexer in [idx2, list('abf'), Index(list('abf'))]: + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, [0, 1, 2, -1]) + + self.assertRaises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='pad')) + self.assertRaises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='backfill')) + self.assertRaises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='nearest')) + + def test_repr_roundtrip(self): + + ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) + str(ci) + tm.assert_index_equal(eval(repr(ci)), ci, exact=True) + + # formatting + if PY3: + str(ci) + else: + compat.text_type(ci) + + # long format + # this is not reprable + ci = CategoricalIndex(np.random.randint(0, 5, size=100)) + if PY3: + str(ci) + else: + compat.text_type(ci) + + def test_isin(self): + + ci = CategoricalIndex( + list('aabca') + [np.nan], categories=['c', 'a', 'b']) + tm.assert_numpy_array_equal( + ci.isin(['c']), + np.array([False, False, False, True, False, False])) + tm.assert_numpy_array_equal( + ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False])) + tm.assert_numpy_array_equal( + ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) + + # mismatched categorical -> coerced to ndarray so doesn't matter + tm.assert_numpy_array_equal( + ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * + 6)) + tm.assert_numpy_array_equal( + ci.isin(ci.set_categories(list('defghi'))), + np.array([False] * 5 + [True])) + + def test_identical(self): + + ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) + ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], + ordered=True) + self.assertTrue(ci1.identical(ci1)) + self.assertTrue(ci1.identical(ci1.copy())) + self.assertFalse(ci1.identical(ci2)) + + def test_equals(self): + + ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) + ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], + ordered=True) + + self.assertTrue(ci1.equals(ci1)) + self.assertFalse(ci1.equals(ci2)) + self.assertTrue(ci1.equals(ci1.astype(object))) + self.assertTrue(ci1.astype(object).equals(ci1)) + + self.assertTrue((ci1 == ci1).all()) + self.assertFalse((ci1 != ci1).all()) + self.assertFalse((ci1 > ci1).all()) + self.assertFalse((ci1 < ci1).all()) + self.assertTrue((ci1 <= ci1).all()) + self.assertTrue((ci1 >= ci1).all()) + + self.assertFalse((ci1 == 1).all()) + self.assertTrue((ci1 == Index(['a', 'b'])).all()) + self.assertTrue((ci1 == ci1.values).all()) + + # invalid comparisons + with tm.assertRaisesRegexp(ValueError, "Lengths must match"): + ci1 == Index(['a', 'b', 'c']) + self.assertRaises(TypeError, lambda: ci1 == ci2) + self.assertRaises( + TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) + self.assertRaises( + TypeError, + lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) + + # tests + # make sure that we are testing for category inclusion properly + self.assertTrue(CategoricalIndex( + list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca'))) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + self.assertTrue(CategoricalIndex( + list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list( + 'aabca'))) + + self.assertFalse(CategoricalIndex( + list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( + 'aabca'))) + self.assertTrue(CategoricalIndex( + list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( + 'aabca') + [np.nan])) + + def test_string_categorical_index_repr(self): + # short + idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) + if PY3: + expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) + + # multiple lines + idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 10) + if PY3: + expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', + u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', + u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', + u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + + self.assertEqual(unicode(idx), expected) + + # truncated + idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 100) + if PY3: + expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + ... + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', + u'ccc', u'a', + ... + u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', + u'bb', u'ccc'], + categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" + + self.assertEqual(unicode(idx), expected) + + # larger categories + idx = pd.CategoricalIndex(list('abcdefghijklmmo')) + if PY3: + expected = u"""CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', + 'm', 'm', 'o'], + categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j', + u'k', u'l', u'm', u'm', u'o'], + categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" + + self.assertEqual(unicode(idx), expected) + + # short + idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) + if PY3: + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) + + # multiple lines + idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) + if PY3: + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', + u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + + self.assertEqual(unicode(idx), expected) + + # truncated + idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) + if PY3: + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', + u'ううう', u'あ', + ... + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう'], + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + + self.assertEqual(unicode(idx), expected) + + # larger categories + idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) + if PY3: + expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', + 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', + u'さ', u'し', u'す', u'せ', u'そ'], + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + + self.assertEqual(unicode(idx), expected) + + # Emable Unicode option ----------------------------------------- + with cf.option_context('display.unicode.east_asian_width', True): + + # short + idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) + if PY3: + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) + + # multiple lines + idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) + if PY3: + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', u'いい', u'ううう'], + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + + self.assertEqual(unicode(idx), expected) + + # truncated + idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) + if PY3: + expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', + u'いい', u'ううう', u'あ', + ... + u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', + u'ううう', u'あ', u'いい', u'ううう'], + categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + + self.assertEqual(unicode(idx), expected) + + # larger categories + idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) + if PY3: + expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', + 'さ', 'し', 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + + self.assertEqual(repr(idx), expected) + else: + expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', + u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], + categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + + self.assertEqual(unicode(idx), expected) + + def test_fillna_categorical(self): + # GH 11343 + idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x') + # fill by value in categories + exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x') + self.assert_index_equal(idx.fillna(1.0), exp) + + # fill by value not in categories raises ValueError + with tm.assertRaisesRegexp(ValueError, + 'fill value must be in categories'): + idx.fillna(2.0) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py new file mode 100644 index 0000000000000..de505b93da241 --- /dev/null +++ b/pandas/tests/indexes/test_datetimelike.py @@ -0,0 +1,855 @@ +# -*- coding: utf-8 -*- + +from datetime import timedelta, time + +import numpy as np + +from pandas import (date_range, period_range, + Series, Index, DatetimeIndex, + TimedeltaIndex, PeriodIndex) + +import pandas.util.testing as tm + +import pandas as pd +from pandas.lib import Timestamp + +from .common import Base + + +class DatetimeLike(Base): + + def test_shift_identity(self): + + idx = self.create_index() + self.assert_index_equal(idx, idx.shift(0)) + + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = 'foo' + self.assertFalse("length=%s" % len(idx) in str(idx)) + self.assertTrue("'foo'" in str(idx)) + self.assertTrue(idx.__class__.__name__ in str(idx)) + + if hasattr(idx, 'tz'): + if idx.tz is not None: + self.assertTrue(idx.tz in str(idx)) + if hasattr(idx, 'freq'): + self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) + + def test_view(self): + super(DatetimeLike, self).test_view() + + i = self.create_index() + + i_view = i.view('i8') + result = self._holder(i) + tm.assert_index_equal(result, i) + + i_view = i.view(self._holder) + result = self._holder(i) + tm.assert_index_equal(result, i_view) + + +class TestDatetimeIndex(DatetimeLike, tm.TestCase): + _holder = DatetimeIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeDateIndex(10)) + self.setup_indices() + + def create_index(self): + return date_range('20130101', periods=5) + + def test_shift(self): + + # test shift for datetimeIndex and non datetimeIndex + # GH8083 + + drange = self.create_index() + result = drange.shift(1) + expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', + '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', + '2013-01-03', '2013-01-04'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D') + expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', + '2013-01-10', + '2013-01-11'], freq='D') + self.assert_index_equal(result, expected) + + def test_construction_with_alt(self): + + i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') + i2 = DatetimeIndex(i, dtype=i.dtype) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) + self.assert_index_equal(i, i2) + + i2 = DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) + self.assert_index_equal(i, i2) + + # localize into the provided tz + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') + expected = i.tz_localize(None).tz_localize('UTC') + self.assert_index_equal(i2, expected) + + i2 = DatetimeIndex(i, tz='UTC') + expected = i.tz_convert('UTC') + self.assert_index_equal(i2, expected) + + # incompat tz/dtype + self.assertRaises(ValueError, lambda: DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) + + def test_pickle_compat_construction(self): + pass + + def test_construction_index_with_mixed_timezones(self): + # GH 11488 + # no tz results in DatetimeIndex + result = Index( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') + ], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + exp = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # passing tz results in DatetimeIndex + result = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), + Timestamp('2011-01-03 00:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # length = 1 + result = Index([Timestamp('2011-01-01')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # length = 1 with tz + result = Index( + [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', + name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_index_with_mixed_timezones_with_NaT(self): + # GH 11488 + result = Index([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), + pd.NaT, Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # same tz results in DatetimeIndex + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # same tz results in DatetimeIndex (DST) + result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + pd.NaT, + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + # different tz results in Index(dtype=object) + result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], + name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') + exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], + dtype='object', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertFalse(isinstance(result, DatetimeIndex)) + + # passing tz results in DatetimeIndex + result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 19:00'), + pd.NaT, Timestamp('2011-01-03 00:00')], + tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # all NaT + result = Index([pd.NaT, pd.NaT], name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNone(result.tz) + + # all NaT with tz + result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + self.assertIsNotNone(result.tz) + self.assertEqual(result.tz, exp.tz) + + def test_construction_dti_with_mixed_timezones(self): + # GH 11488 (not changed, added explicit tests) + + # no tz results in DatetimeIndex + result = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') + ], tz='Asia/Tokyo', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # same tz results in DatetimeIndex (DST) + result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), + Timestamp('2011-08-01 10:00', + tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # different tz coerces tz-naive to tz-awareIndex(dtype=object) + result = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), + Timestamp('2011-01-02 10:00')], + tz='US/Eastern', name='idx') + self.assert_index_equal(result, exp, exact=True) + self.assertTrue(isinstance(result, DatetimeIndex)) + + # tz mismatch affecting to tz-aware raises TypeError/ValueError + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + + with tm.assertRaises(TypeError): + DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + + with tm.assertRaises(ValueError): + DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='US/Eastern', name='idx') + + def test_get_loc(self): + idx = pd.date_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + if method is not None: + self.assertEqual(idx.get_loc(idx[1], method, + tolerance=pd.Timedelta('0 days')), + 1) + + self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) + + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance='1 day'), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=pd.Timedelta('1D')), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=np.timedelta64(1, 'D')), 1) + self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', + tolerance=timedelta(1)), 1) + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') + with tm.assertRaises(KeyError): + idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') + + self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) + self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) + + self.assertEqual(idx.get_loc('1999', method='nearest'), 0) + self.assertEqual(idx.get_loc('2001', method='nearest'), 2) + + with tm.assertRaises(KeyError): + idx.get_loc('1999', method='pad') + with tm.assertRaises(KeyError): + idx.get_loc('2001', method='backfill') + + with tm.assertRaises(KeyError): + idx.get_loc('foobar') + with tm.assertRaises(TypeError): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(['2000-01-01', '2000-01-04']) + self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) + self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) + self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) + + # time indexing + idx = pd.date_range('2000-01-01', periods=24, freq='H') + tm.assert_numpy_array_equal(idx.get_loc(time(12)), [12]) + tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), []) + with tm.assertRaises(NotImplementedError): + idx.get_loc(time(12, 30), method='pad') + + def test_get_indexer(self): + idx = pd.date_range('2000-01-01', periods=3) + tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) + + target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', + '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')), + [0, -1, 1]) + with tm.assertRaises(ValueError): + idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') + + def test_roundtrip_pickle_with_tz(self): + + # GH 8367 + # round-trip of timezone + index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') + unpickled = self.round_trip_pickle(index) + self.assertTrue(index.equals(unpickled)) + + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): + # GH7774 + index = date_range('20130101', periods=3, tz='US/Eastern') + self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') + self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') + + def test_time_loc(self): # GH8667 + from datetime import time + from pandas.index import _SIZE_CUTOFF + + ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) + key = time(15, 11, 30) + start = key.hour * 3600 + key.minute * 60 + key.second + step = 24 * 3600 + + for n in ns: + idx = pd.date_range('2014-11-26', periods=n, freq='S') + ts = pd.Series(np.random.randn(n), index=idx) + i = np.arange(start, n, step) + + tm.assert_numpy_array_equal(ts.index.get_loc(key), i) + tm.assert_series_equal(ts[key], ts.iloc[i]) + + left, right = ts.copy(), ts.copy() + left[key] *= -10 + right.iloc[i] *= -10 + tm.assert_series_equal(left, right) + + def test_time_overflow_for_32bit_machines(self): + # GH8943. On some machines NumPy defaults to np.int32 (for example, + # 32-bit Linux machines). In the function _generate_regular_range + # found in tseries/index.py, `periods` gets multiplied by `strides` + # (which has value 1e9) and since the max value for np.int32 is ~2e9, + # and since those machines won't promote np.int32 to np.int64, we get + # overflow. + periods = np.int_(1000) + + idx1 = pd.date_range(start='2000', periods=periods, freq='S') + self.assertEqual(len(idx1), periods) + + idx2 = pd.date_range(end='2000', periods=periods, freq='S') + self.assertEqual(len(idx2), periods) + + def test_intersection(self): + first = self.index + second = self.index[5:] + intersect = first.intersection(second) + self.assertTrue(tm.equalContents(intersect, second)) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + self.assertTrue(tm.equalContents(result, second)) + + third = Index(['a', 'b', 'c']) + result = first.intersection(third) + expected = pd.Index([], dtype=object) + self.assert_index_equal(result, expected) + + def test_union(self): + first = self.index[:5] + second = self.index[5:] + everything = self.index + union = first.union(second) + self.assertTrue(tm.equalContents(union, everything)) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case) + self.assertTrue(tm.equalContents(result, everything)) + + def test_nat(self): + self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) + + def test_ufunc_coercions(self): + idx = date_range('2011-01-01', periods=3, freq='2D', name='x') + + delta = np.timedelta64(1, 'D') + for result in [idx + delta, np.add(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = date_range('2011-01-02', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + for result in [idx - delta, np.subtract(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = date_range('2010-12-31', periods=3, freq='2D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '2D') + + delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), + np.timedelta64(3, 'D')]) + for result in [idx + delta, np.add(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], + freq='3D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '3D') + + for result in [idx - delta, np.subtract(idx, delta)]: + tm.assertIsInstance(result, DatetimeIndex) + exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], + freq='D', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'D') + + def test_fillna_datetime64(self): + # GH 11343 + for tz in ['US/Eastern', 'Asia/Tokyo']: + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00']) + + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00']) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # tz mismatch + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), + pd.Timestamp('2011-01-01 10:00', tz=tz), + pd.Timestamp('2011-01-01 11:00')], dtype=object) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', + pd.Timestamp('2011-01-01 11:00')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + idx = pd.DatetimeIndex( + ['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], tz=tz) + + exp = pd.DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], tz=tz) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + pd.Timestamp('2011-01-01 10:00'), + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + + # object + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + 'x', + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + +class TestPeriodIndex(DatetimeLike, tm.TestCase): + _holder = PeriodIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makePeriodIndex(10)) + self.setup_indices() + + def create_index(self): + return period_range('20130101', periods=5, freq='D') + + def test_shift(self): + + # test shift for PeriodIndex + # GH8083 + drange = self.create_index() + result = drange.shift(1) + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') + self.assert_index_equal(result, expected) + + def test_pickle_compat_construction(self): + pass + + def test_get_loc(self): + idx = pd.period_range('2000-01-01', periods=3) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual( + idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) + self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) + self.assertEqual( + idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + idx = pd.period_range('2000-01-01', periods=5)[::2] + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance='1 day'), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=pd.Timedelta('1D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=np.timedelta64(1, 'D')), 1) + self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', + tolerance=timedelta(1)), 1) + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc('2000-01-10', method='nearest', tolerance='foo') + + msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' + with tm.assertRaisesRegexp(ValueError, msg): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') + with tm.assertRaises(KeyError): + idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') + + def test_get_indexer(self): + idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') + tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) + + target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', + '2000-01-02T01'], freq='H') + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', tolerance='1 hour'), + [0, -1, 1]) + + msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' + with self.assertRaisesRegexp(ValueError, msg): + idx.get_indexer(target, 'nearest', tolerance='1 minute') + + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', tolerance='1 day'), [0, 1, 1]) + + def test_repeat(self): + # GH10183 + idx = pd.period_range('2000-01-01', periods=3, freq='D') + res = idx.repeat(3) + exp = PeriodIndex(idx.values.repeat(3), freq='D') + self.assert_index_equal(res, exp) + self.assertEqual(res.freqstr, 'D') + + def test_period_index_indexer(self): + + # GH4125 + idx = pd.period_range('2002-01', '2003-12', freq='M') + df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) + self.assert_frame_equal(df, df.ix[idx]) + self.assert_frame_equal(df, df.ix[list(idx)]) + self.assert_frame_equal(df, df.loc[list(idx)]) + self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + self.assert_frame_equal(df, df.loc[list(idx)]) + + def test_fillna_period(self): + # GH 11343 + idx = pd.PeriodIndex( + ['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], freq='H') + + exp = pd.PeriodIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], freq='H') + self.assert_index_equal( + idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) + + exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', + pd.Period('2011-01-01 11:00', freq='H')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) + + with tm.assertRaisesRegexp( + ValueError, + 'Input has different freq=D from PeriodIndex\\(freq=H\\)'): + idx.fillna(pd.Period('2011-01-01', freq='D')) + + def test_no_millisecond_field(self): + with self.assertRaises(AttributeError): + DatetimeIndex.millisecond + + with self.assertRaises(AttributeError): + DatetimeIndex([]).millisecond + + +class TestTimedeltaIndex(DatetimeLike, tm.TestCase): + _holder = TimedeltaIndex + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=tm.makeTimedeltaIndex(10)) + self.setup_indices() + + def create_index(self): + return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + + def test_shift(self): + # test shift for TimedeltaIndex + # err8083 + + drange = self.create_index() + result = drange.shift(1) + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') + self.assert_index_equal(result, expected) + + result = drange.shift(3, freq='2D 1s') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') + self.assert_index_equal(result, expected) + + def test_get_loc(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(idx[1], method), 1) + self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) + self.assertEqual(idx.get_loc(str(idx[1]), method), 1) + + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) + self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) + + with tm.assertRaisesRegexp(ValueError, 'must be convertible'): + idx.get_loc(idx[1], method='nearest', tolerance='foo') + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) + + def test_get_indexer(self): + idx = pd.to_timedelta(['0 days', '1 days', '2 days']) + tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) + + target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')), + [0, -1, 1]) + + def test_numeric_compat(self): + + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + result = idx / 1 + tm.assert_index_equal(result, idx) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) + + # invalid + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + + def test_pickle_compat_construction(self): + pass + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + + for result in [idx * 2, np.multiply(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], + freq='4H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '4H') + + for result in [idx / 2, np.divide(idx, 2)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], + freq='H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, 'H') + + idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], + freq='2H', name='x') + for result in [-idx, np.negative(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], + freq='-2H', name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, '-2H') + + idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], + freq='H', name='x') + for result in [abs(idx), np.absolute(idx)]: + tm.assertIsInstance(result, TimedeltaIndex) + exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], + freq=None, name='x') + tm.assert_index_equal(result, exp) + self.assertEqual(result.freq, None) + + def test_fillna_timedelta(self): + # GH 11343 + idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) + + exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) + self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) + + exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) + idx.fillna(pd.Timedelta('3 hour')) + + exp = pd.Index( + [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) + self.assert_index_equal(idx.fillna('x'), exp) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py new file mode 100644 index 0000000000000..6bc644d84b0d0 --- /dev/null +++ b/pandas/tests/indexes/test_multi.py @@ -0,0 +1,1949 @@ +# -*- coding: utf-8 -*- + +from datetime import timedelta +from itertools import product +import nose +import re +import warnings + +from pandas import (date_range, MultiIndex, Index, CategoricalIndex, + compat) +from pandas.indexes.base import InvalidIndexError +from pandas.compat import range, lrange, u, PY3, long, lzip + +import numpy as np + +from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, + assert_copy) + +import pandas.util.testing as tm + +import pandas as pd +from pandas.lib import Timestamp + +from .common import Base + + +class TestMultiIndex(Base, tm.TestCase): + _holder = MultiIndex + _multiprocess_can_split_ = True + _compat_props = ['shape', 'ndim', 'size', 'itemsize'] + + def setUp(self): + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + self.index_names = ['first', 'second'] + self.indices = dict(index=MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels + ], names=self.index_names, + verify_integrity=False)) + self.setup_indices() + + def create_index(self): + return self.index + + def test_boolean_context_compat2(self): + + # boolean context compat + # GH7897 + i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) + common = i1.intersection(i2) + + def f(): + if common: + pass + + tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) + + def test_labels_dtypes(self): + + # GH 8456 + i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) + self.assertTrue(i.labels[0].dtype == 'int8') + self.assertTrue(i.labels[1].dtype == 'int8') + + i = MultiIndex.from_product([['a'], range(40)]) + self.assertTrue(i.labels[1].dtype == 'int8') + i = MultiIndex.from_product([['a'], range(400)]) + self.assertTrue(i.labels[1].dtype == 'int16') + i = MultiIndex.from_product([['a'], range(40000)]) + self.assertTrue(i.labels[1].dtype == 'int32') + + i = pd.MultiIndex.from_product([['a'], range(1000)]) + self.assertTrue((i.labels[0] >= 0).all()) + self.assertTrue((i.labels[1] >= 0).all()) + + def test_set_name_methods(self): + # so long as these are synonyms, we don't need to test set_names + self.assertEqual(self.index.rename, self.index.set_names) + new_names = [name + "SUFFIX" for name in self.index_names] + ind = self.index.set_names(new_names) + self.assertEqual(self.index.names, self.index_names) + self.assertEqual(ind.names, new_names) + with assertRaisesRegexp(ValueError, "^Length"): + ind.set_names(new_names + new_names) + new_names2 = [name + "SUFFIX2" for name in new_names] + res = ind.set_names(new_names2, inplace=True) + self.assertIsNone(res) + self.assertEqual(ind.names, new_names2) + + # set names for specific level (# GH7792) + ind = self.index.set_names(new_names[0], level=0) + self.assertEqual(self.index.names, self.index_names) + self.assertEqual(ind.names, [new_names[0], self.index_names[1]]) + + res = ind.set_names(new_names2[0], level=0, inplace=True) + self.assertIsNone(res) + self.assertEqual(ind.names, [new_names2[0], self.index_names[1]]) + + # set names for multiple levels + ind = self.index.set_names(new_names, level=[0, 1]) + self.assertEqual(self.index.names, self.index_names) + self.assertEqual(ind.names, new_names) + + res = ind.set_names(new_names2, level=[0, 1], inplace=True) + self.assertIsNone(res) + self.assertEqual(ind.names, new_names2) + + def test_set_levels(self): + # side note - you probably wouldn't want to use levels and labels + # directly like this - but it is possible. + levels = self.index.levels + new_levels = [[lev + 'a' for lev in level] for level in levels] + + def assert_matching(actual, expected): + # avoid specifying internal representation + # as much as possible + self.assertEqual(len(actual), len(expected)) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + assert_almost_equal(act, exp) + + # level changing [w/o mutation] + ind2 = self.index.set_levels(new_levels) + assert_matching(ind2.levels, new_levels) + assert_matching(self.index.levels, levels) + + # level changing [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.levels, new_levels) + + # level changing specific level [w/o mutation] + ind2 = self.index.set_levels(new_levels[0], level=0) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(self.index.levels, levels) + + ind2 = self.index.set_levels(new_levels[1], level=1) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(self.index.levels, levels) + + # level changing multiple levels [w/o mutation] + ind2 = self.index.set_levels(new_levels, level=[0, 1]) + assert_matching(ind2.levels, new_levels) + assert_matching(self.index.levels, levels) + + # level changing specific level [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(self.index.levels, levels) + + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(self.index.levels, levels) + + # level changing multiple levels [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_levels(new_levels, level=[0, 1], + inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.levels, new_levels) + assert_matching(self.index.levels, levels) + + def test_set_labels(self): + # side note - you probably wouldn't want to use levels and labels + # directly like this - but it is possible. + labels = self.index.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + def assert_matching(actual, expected): + # avoid specifying internal representation + # as much as possible + self.assertEqual(len(actual), len(expected)) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + assert_almost_equal(act, exp) + + # label changing [w/o mutation] + ind2 = self.index.set_labels(new_labels) + assert_matching(ind2.labels, new_labels) + assert_matching(self.index.labels, labels) + + # label changing [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.labels, new_labels) + + # label changing specific level [w/o mutation] + ind2 = self.index.set_labels(new_labels[0], level=0) + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(self.index.labels, labels) + + ind2 = self.index.set_labels(new_labels[1], level=1) + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(self.index.labels, labels) + + # label changing multiple levels [w/o mutation] + ind2 = self.index.set_labels(new_labels, level=[0, 1]) + assert_matching(ind2.labels, new_labels) + assert_matching(self.index.labels, labels) + + # label changing specific level [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.labels, [new_labels[0], labels[1]]) + assert_matching(self.index.labels, labels) + + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.labels, [labels[0], new_labels[1]]) + assert_matching(self.index.labels, labels) + + # label changing multiple levels [w/ mutation] + ind2 = self.index.copy() + inplace_return = ind2.set_labels(new_labels, level=[0, 1], + inplace=True) + self.assertIsNone(inplace_return) + assert_matching(ind2.labels, new_labels) + assert_matching(self.index.labels, labels) + + def test_set_levels_labels_names_bad_input(self): + levels, labels = self.index.levels, self.index.labels + names = self.index.names + + with tm.assertRaisesRegexp(ValueError, 'Length of levels'): + self.index.set_levels([levels[0]]) + + with tm.assertRaisesRegexp(ValueError, 'Length of labels'): + self.index.set_labels([labels[0]]) + + with tm.assertRaisesRegexp(ValueError, 'Length of names'): + self.index.set_names([names[0]]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_levels(levels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_labels(labels[0]) + + # shouldn't scalar data error, instead should demand list-like + with tm.assertRaisesRegexp(TypeError, 'list-like'): + self.index.set_names(names[0]) + + # should have equal lengths + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_levels(levels[0], level=[0, 1]) + + with tm.assertRaisesRegexp(TypeError, 'list-like'): + self.index.set_levels(levels, level=0) + + # should have equal lengths + with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): + self.index.set_labels(labels[0], level=[0, 1]) + + with tm.assertRaisesRegexp(TypeError, 'list-like'): + self.index.set_labels(labels, level=0) + + # should have equal lengths + with tm.assertRaisesRegexp(ValueError, 'Length of names'): + self.index.set_names(names[0], level=[0, 1]) + + with tm.assertRaisesRegexp(TypeError, 'string'): + self.index.set_names(names, level=0) + + def test_metadata_immutable(self): + levels, labels = self.index.levels, self.index.labels + # shouldn't be able to set at either the top level or base level + mutable_regex = re.compile('does not support mutable operations') + with assertRaisesRegexp(TypeError, mutable_regex): + levels[0] = levels[0] + with assertRaisesRegexp(TypeError, mutable_regex): + levels[0][0] = levels[0][0] + # ditto for labels + with assertRaisesRegexp(TypeError, mutable_regex): + labels[0] = labels[0] + with assertRaisesRegexp(TypeError, mutable_regex): + labels[0][0] = labels[0][0] + # and for names + names = self.index.names + with assertRaisesRegexp(TypeError, mutable_regex): + names[0] = names[0] + + def test_inplace_mutation_resets_values(self): + levels = [['a', 'b', 'c'], [4]] + levels2 = [[1, 2, 3], ['a']] + labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + mi1 = MultiIndex(levels=levels, labels=labels) + mi2 = MultiIndex(levels=levels2, labels=labels) + vals = mi1.values.copy() + vals2 = mi2.values.copy() + self.assertIsNotNone(mi1._tuples) + + # make sure level setting works + new_vals = mi1.set_levels(levels2).values + assert_almost_equal(vals2, new_vals) + # non-inplace doesn't kill _tuples [implementation detail] + assert_almost_equal(mi1._tuples, vals) + # and values is still same too + assert_almost_equal(mi1.values, vals) + + # inplace should kill _tuples + mi1.set_levels(levels2, inplace=True) + assert_almost_equal(mi1.values, vals2) + + # make sure label setting works too + labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + exp_values = np.empty((6, ), dtype=object) + exp_values[:] = [(long(1), 'a')] * 6 + # must be 1d array of tuples + self.assertEqual(exp_values.shape, (6, )) + new_values = mi2.set_labels(labels2).values + # not inplace shouldn't change + assert_almost_equal(mi2._tuples, vals2) + # should have correct values + assert_almost_equal(exp_values, new_values) + + # and again setting inplace should kill _tuples, etc + mi2.set_labels(labels2, inplace=True) + assert_almost_equal(mi2.values, new_values) + + def test_copy_in_constructor(self): + levels = np.array(["a", "b", "c"]) + labels = np.array([1, 1, 2, 0, 0, 1, 1]) + val = labels[0] + mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], + copy=True) + self.assertEqual(mi.labels[0][0], val) + labels[0] = 15 + self.assertEqual(mi.labels[0][0], val) + val = levels[0] + levels[0] = "PANDA" + self.assertEqual(mi.levels[0][0], val) + + def test_set_value_keeps_names(self): + # motivating example from #3742 + lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] + lev2 = ['1', '2', '3'] * 2 + idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number']) + df = pd.DataFrame( + np.random.randn(6, 4), + columns=['one', 'two', 'three', 'four'], + index=idx) + df = df.sortlevel() + self.assertIsNone(df.is_copy) + self.assertEqual(df.index.names, ('Name', 'Number')) + df = df.set_value(('grethe', '4'), 'one', 99.34) + self.assertIsNone(df.is_copy) + self.assertEqual(df.index.names, ('Name', 'Number')) + + def test_names(self): + + # names are assigned in __init__ + names = self.index_names + level_names = [level.name for level in self.index.levels] + self.assertEqual(names, level_names) + + # setting bad names on existing + index = self.index + assertRaisesRegexp(ValueError, "^Length of names", setattr, index, + "names", list(index.names) + ["third"]) + assertRaisesRegexp(ValueError, "^Length of names", setattr, index, + "names", []) + + # initializing with bad names (should always be equivalent) + major_axis, minor_axis = self.index.levels + major_labels, minor_labels = self.index.labels + assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first']) + assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, + levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels], + names=['first', 'second', 'third']) + + # names are assigned + index.names = ["a", "b"] + ind_names = list(index.names) + level_names = [level.name for level in index.levels] + self.assertEqual(ind_names, level_names) + + def test_reference_duplicate_name(self): + idx = MultiIndex.from_tuples( + [('a', 'b'), ('c', 'd')], names=['x', 'x']) + self.assertTrue(idx._reference_duplicate_name('x')) + + idx = MultiIndex.from_tuples( + [('a', 'b'), ('c', 'd')], names=['x', 'y']) + self.assertFalse(idx._reference_duplicate_name('x')) + + def test_astype(self): + expected = self.index.copy() + actual = self.index.astype('O') + assert_copy(actual.levels, expected.levels) + assert_copy(actual.labels, expected.labels) + self.check_level_names(actual, expected.names) + + with assertRaisesRegexp(TypeError, "^Setting.*dtype.*object"): + self.index.astype(np.dtype(int)) + + def test_constructor_single_level(self): + single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + tm.assertIsInstance(single_level, Index) + self.assertNotIsInstance(single_level, MultiIndex) + self.assertEqual(single_level.name, 'first') + + single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]]) + self.assertIsNone(single_level.name) + + def test_constructor_no_levels(self): + assertRaisesRegexp(ValueError, "non-zero number of levels/labels", + MultiIndex, levels=[], labels=[]) + both_re = re.compile('Must pass both levels and labels') + with tm.assertRaisesRegexp(TypeError, both_re): + MultiIndex(levels=[]) + with tm.assertRaisesRegexp(TypeError, both_re): + MultiIndex(labels=[]) + + def test_constructor_mismatched_label_levels(self): + labels = [np.array([1]), np.array([2]), np.array([3])] + levels = ["a"] + assertRaisesRegexp(ValueError, "Length of levels and labels must be" + " the same", MultiIndex, levels=levels, + labels=labels) + length_error = re.compile('>= length of level') + label_error = re.compile(r'Unequal label lengths: \[4, 2\]') + + # important to check that it's looking at the right thing. + with tm.assertRaisesRegexp(ValueError, length_error): + MultiIndex(levels=[['a'], ['b']], + labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) + + with tm.assertRaisesRegexp(ValueError, label_error): + MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) + + # external API + with tm.assertRaisesRegexp(ValueError, length_error): + self.index.copy().set_levels([['a'], ['b']]) + + with tm.assertRaisesRegexp(ValueError, label_error): + self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) + + # deprecated properties + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + + with tm.assertRaisesRegexp(ValueError, length_error): + self.index.copy().levels = [['a'], ['b']] + + with tm.assertRaisesRegexp(ValueError, label_error): + self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] + + def assert_multiindex_copied(self, copy, original): + # levels shoudl be (at least, shallow copied) + assert_copy(copy.levels, original.levels) + + assert_almost_equal(copy.labels, original.labels) + + # labels doesn't matter which way copied + assert_almost_equal(copy.labels, original.labels) + self.assertIsNot(copy.labels, original.labels) + + # names doesn't matter which way copied + self.assertEqual(copy.names, original.names) + self.assertIsNot(copy.names, original.names) + + # sort order should be copied + self.assertEqual(copy.sortorder, original.sortorder) + + def test_copy(self): + i_copy = self.index.copy() + + self.assert_multiindex_copied(i_copy, self.index) + + def test_shallow_copy(self): + i_copy = self.index._shallow_copy() + + self.assert_multiindex_copied(i_copy, self.index) + + def test_view(self): + i_view = self.index.view() + + self.assert_multiindex_copied(i_view, self.index) + + def check_level_names(self, index, names): + self.assertEqual([level.name for level in index.levels], list(names)) + + def test_changing_names(self): + + # names should be applied to levels + level_names = [level.name for level in self.index.levels] + self.check_level_names(self.index, self.index.names) + + view = self.index.view() + copy = self.index.copy() + shallow_copy = self.index._shallow_copy() + + # changing names should change level names on object + new_names = [name + "a" for name in self.index.names] + self.index.names = new_names + self.check_level_names(self.index, new_names) + + # but not on copies + self.check_level_names(view, level_names) + self.check_level_names(copy, level_names) + self.check_level_names(shallow_copy, level_names) + + # and copies shouldn't change original + shallow_copy.names = [name + "c" for name in shallow_copy.names] + self.check_level_names(self.index, new_names) + + def test_duplicate_names(self): + self.index.names = ['foo', 'foo'] + assertRaisesRegexp(KeyError, 'Level foo not found', + self.index._get_level_number, 'foo') + + def test_get_level_number_integer(self): + self.index.names = [1, 0] + self.assertEqual(self.index._get_level_number(1), 0) + self.assertEqual(self.index._get_level_number(0), 1) + self.assertRaises(IndexError, self.index._get_level_number, 2) + assertRaisesRegexp(KeyError, 'Level fourth not found', + self.index._get_level_number, 'fourth') + + def test_from_arrays(self): + arrays = [] + for lev, lab in zip(self.index.levels, self.index.labels): + arrays.append(np.asarray(lev).take(lab)) + + result = MultiIndex.from_arrays(arrays) + self.assertEqual(list(result), list(self.index)) + + # infer correctly + result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], + ['a', 'b']]) + self.assertTrue(result.levels[0].equals(Index([Timestamp('20130101') + ]))) + self.assertTrue(result.levels[1].equals(Index(['a', 'b']))) + + def test_from_product(self): + + first = ['foo', 'bar', 'buz'] + second = ['a', 'b', 'c'] + names = ['first', 'second'] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] + expected = MultiIndex.from_tuples(tuples, names=names) + + tm.assert_numpy_array_equal(result, expected) + self.assertEqual(result.names, names) + + def test_from_product_datetimeindex(self): + dt_index = date_range('2000-01-01', periods=2) + mi = pd.MultiIndex.from_product([[1, 2], dt_index]) + etalon = pd.lib.list_to_object_array([(1, pd.Timestamp( + '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( + '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) + tm.assert_numpy_array_equal(mi.values, etalon) + + def test_values_boxed(self): + tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), + (3, pd.Timestamp('2000-01-03')), + (1, pd.Timestamp('2000-01-04')), + (2, pd.Timestamp('2000-01-02')), + (3, pd.Timestamp('2000-01-03'))] + mi = pd.MultiIndex.from_tuples(tuples) + tm.assert_numpy_array_equal(mi.values, + pd.lib.list_to_object_array(tuples)) + # Check that code branches for boxed values produce identical results + tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) + + def test_append(self): + result = self.index[:3].append(self.index[3:]) + self.assertTrue(result.equals(self.index)) + + foos = [self.index[:1], self.index[1:3], self.index[3:]] + result = foos[0].append(foos[1:]) + self.assertTrue(result.equals(self.index)) + + # empty + result = self.index.append([]) + self.assertTrue(result.equals(self.index)) + + def test_get_level_values(self): + result = self.index.get_level_values(0) + expected = ['foo', 'foo', 'bar', 'baz', 'qux', 'qux'] + tm.assert_numpy_array_equal(result, expected) + + self.assertEqual(result.name, 'first') + + result = self.index.get_level_values('first') + expected = self.index.get_level_values(0) + tm.assert_numpy_array_equal(result, expected) + + # GH 10460 + index = MultiIndex(levels=[CategoricalIndex( + ['A', 'B']), CategoricalIndex([1, 2, 3])], labels=[np.array( + [0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])]) + exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) + self.assert_index_equal(index.get_level_values(0), exp) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) + self.assert_index_equal(index.get_level_values(1), exp) + + def test_get_level_values_na(self): + arrays = [['a', 'b', 'b'], [1, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = [1, np.nan, 2] + tm.assert_numpy_array_equal(values.values.astype(float), expected) + + arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = [np.nan, np.nan, 2] + tm.assert_numpy_array_equal(values.values.astype(float), expected) + + arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(0) + expected = [np.nan, np.nan, np.nan] + tm.assert_numpy_array_equal(values.values.astype(float), expected) + values = index.get_level_values(1) + expected = np.array(['a', np.nan, 1], dtype=object) + tm.assert_numpy_array_equal(values.values, expected) + + arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + tm.assert_numpy_array_equal(values.values, expected.values) + + arrays = [[], []] + index = pd.MultiIndex.from_arrays(arrays) + values = index.get_level_values(0) + self.assertEqual(values.shape, (0, )) + + def test_reorder_levels(self): + # this blows up + assertRaisesRegexp(IndexError, '^Too many levels', + self.index.reorder_levels, [2, 1, 0]) + + def test_nlevels(self): + self.assertEqual(self.index.nlevels, 2) + + def test_iter(self): + result = list(self.index) + expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] + self.assertEqual(result, expected) + + def test_legacy_pickle(self): + if PY3: + raise nose.SkipTest("testing for legacy pickles not " + "support on py3") + + path = tm.get_data_path('multiindex_v1.pickle') + obj = pd.read_pickle(path) + + obj2 = MultiIndex.from_tuples(obj.values) + self.assertTrue(obj.equals(obj2)) + + res = obj.get_indexer(obj) + exp = np.arange(len(obj)) + assert_almost_equal(res, exp) + + res = obj.get_indexer(obj2[::-1]) + exp = obj.get_indexer(obj[::-1]) + exp2 = obj2.get_indexer(obj2[::-1]) + assert_almost_equal(res, exp) + assert_almost_equal(exp, exp2) + + def test_legacy_v2_unpickle(self): + + # 0.7.3 -> 0.8.0 format manage + path = tm.get_data_path('mindex_073.pickle') + obj = pd.read_pickle(path) + + obj2 = MultiIndex.from_tuples(obj.values) + self.assertTrue(obj.equals(obj2)) + + res = obj.get_indexer(obj) + exp = np.arange(len(obj)) + assert_almost_equal(res, exp) + + res = obj.get_indexer(obj2[::-1]) + exp = obj.get_indexer(obj[::-1]) + exp2 = obj2.get_indexer(obj2[::-1]) + assert_almost_equal(res, exp) + assert_almost_equal(exp, exp2) + + def test_roundtrip_pickle_with_tz(self): + + # GH 8367 + # round-trip of timezone + index = MultiIndex.from_product( + [[1, 2], ['a', 'b'], date_range('20130101', periods=3, + tz='US/Eastern') + ], names=['one', 'two', 'three']) + unpickled = self.round_trip_pickle(index) + self.assertTrue(index.equal_levels(unpickled)) + + def test_from_tuples_index_values(self): + result = MultiIndex.from_tuples(self.index) + self.assertTrue((result.values == self.index.values).all()) + + def test_contains(self): + self.assertIn(('foo', 'two'), self.index) + self.assertNotIn(('bar', 'two'), self.index) + self.assertNotIn(None, self.index) + + def test_is_all_dates(self): + self.assertFalse(self.index.is_all_dates) + + def test_is_numeric(self): + # MultiIndex is never numeric + self.assertFalse(self.index.is_numeric()) + + def test_getitem(self): + # scalar + self.assertEqual(self.index[2], ('bar', 'one')) + + # slice + result = self.index[2:5] + expected = self.index[[2, 3, 4]] + self.assertTrue(result.equals(expected)) + + # boolean + result = self.index[[True, False, True, False, True, True]] + result2 = self.index[np.array([True, False, True, False, True, True])] + expected = self.index[[0, 2, 4, 5]] + self.assertTrue(result.equals(expected)) + self.assertTrue(result2.equals(expected)) + + def test_getitem_group_select(self): + sorted_idx, _ = self.index.sortlevel(0) + self.assertEqual(sorted_idx.get_loc('baz'), slice(3, 4)) + self.assertEqual(sorted_idx.get_loc('foo'), slice(0, 2)) + + def test_get_loc(self): + self.assertEqual(self.index.get_loc(('foo', 'two')), 1) + self.assertEqual(self.index.get_loc(('baz', 'two')), 3) + self.assertRaises(KeyError, self.index.get_loc, ('bar', 'two')) + self.assertRaises(KeyError, self.index.get_loc, 'quux') + + self.assertRaises(NotImplementedError, self.index.get_loc, 'foo', + method='nearest') + + # 3 levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + self.assertRaises(KeyError, index.get_loc, (1, 1)) + self.assertEqual(index.get_loc((2, 0)), slice(3, 5)) + + def test_get_loc_duplicates(self): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + self.assertEqual(result, expected) + # self.assertRaises(Exception, index.get_loc, 2) + + index = Index(['c', 'a', 'a', 'b', 'b']) + rs = index.get_loc('c') + xp = 0 + assert (rs == xp) + + def test_get_loc_level(self): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + self.assertEqual(loc, expected) + self.assertTrue(new_index.equals(exp_index)) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + self.assertEqual(loc, expected) + self.assertIsNone(new_index) + + self.assertRaises(KeyError, index.get_loc_level, (2, 2)) + + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( + [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + self.assertEqual(result, expected) + self.assertTrue(new_index.equals(index.droplevel(0))) + + def test_slice_locs(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + + slob = slice(*idx.slice_locs(df.index[5], df.index[15])) + sliced = stacked[slob] + expected = df[5:16].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30), + df.index[15] - timedelta(seconds=30))) + sliced = stacked[slob] + expected = df[6:15].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + def test_slice_locs_with_type_mismatch(self): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, + (1, 3)) + assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, + df.index[5] + timedelta(seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + with assertRaisesRegexp(TypeError, '^Level type mismatch'): + idx.slice_locs(timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + with assertRaisesRegexp(TypeError, '^Level type mismatch'): + idx.slice_locs(df.index[1], (16, "a")) + + def test_slice_locs_not_sorted(self): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + assertRaisesRegexp(KeyError, "[Kk]ey length.*greater than MultiIndex" + " lexsort depth", index.slice_locs, (1, 0, 1), + (2, 1, 0)) + + # works + sorted_index, _ = index.sortlevel(0) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + + def test_slice_locs_partial(self): + sorted_idx, _ = self.index.sortlevel(0) + + result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) + self.assertEqual(result, (1, 5)) + + result = sorted_idx.slice_locs(None, ('qux', 'one')) + self.assertEqual(result, (0, 5)) + + result = sorted_idx.slice_locs(('foo', 'two'), None) + self.assertEqual(result, (1, len(sorted_idx))) + + result = sorted_idx.slice_locs('bar', 'baz') + self.assertEqual(result, (2, 4)) + + def test_slice_locs_not_contained(self): + # some searchsorted action + + index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], + labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], + [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) + + result = index.slice_locs((1, 0), (5, 2)) + self.assertEqual(result, (3, 6)) + + result = index.slice_locs(1, 5) + self.assertEqual(result, (3, 6)) + + result = index.slice_locs((2, 2), (5, 2)) + self.assertEqual(result, (3, 6)) + + result = index.slice_locs(2, 5) + self.assertEqual(result, (3, 6)) + + result = index.slice_locs((1, 0), (6, 3)) + self.assertEqual(result, (3, 8)) + + result = index.slice_locs(-1, 10) + self.assertEqual(result, (0, len(index))) + + def test_consistency(self): + # need to construct an overflow + major_axis = lrange(70000) + minor_axis = lrange(10) + + major_labels = np.arange(70000) + minor_labels = np.repeat(lrange(10), 7000) + + # the fact that is works means it's consistent + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + # inconsistent + major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + self.assertFalse(index.is_unique) + + def test_truncate(self): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + + result = index.truncate(before=1) + self.assertNotIn('foo', result.levels[0]) + self.assertIn(1, result.levels[0]) + + result = index.truncate(after=1) + self.assertNotIn(2, result.levels[0]) + self.assertIn(1, result.levels[0]) + + result = index.truncate(before=1, after=2) + self.assertEqual(len(result.levels[0]), 2) + + # after < before + self.assertRaises(ValueError, index.truncate, 3, 1) + + def test_get_indexer(self): + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, [1, 3, -1]) + + r1 = idx2.get_indexer(idx1, method='pad') + e1 = [-1, 0, 0, 1, 1] + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='pad') + assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method='ffill') + assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method='backfill') + e1 = [0, 0, 1, 1, 2] + assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method='backfill') + assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method='bfill') + assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2._tuple_index) + rexp1 = idx1.get_indexer(idx2) + assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + self.assertTrue((r1 == [-1, -1, -1]).all()) + + # create index with duplicates + idx1 = Index(lrange(10) + lrange(10)) + idx2 = Index(lrange(20)) + assertRaisesRegexp(InvalidIndexError, "Reindexing only valid with" + " uniquely valued Index objects", idx1.get_indexer, + idx2) + + def test_get_indexer_nearest(self): + midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) + with tm.assertRaises(NotImplementedError): + midx.get_indexer(['a'], method='nearest') + with tm.assertRaises(NotImplementedError): + midx.get_indexer(['a'], method='pad', tolerance=2) + + def test_format(self): + self.index.format() + self.index[:0].format() + + def test_format_integer_names(self): + index = MultiIndex(levels=[[0, 1], [0, 1]], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) + index.format(names=True) + + def test_format_sparse_display(self): + index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], + labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) + + result = index.format() + self.assertEqual(result[3], '1 0 0 0') + + def test_format_sparse_config(self): + warn_filters = warnings.filters + warnings.filterwarnings('ignore', category=FutureWarning, + module=".*format") + # GH1538 + pd.set_option('display.multi_sparse', False) + + result = self.index.format() + self.assertEqual(result[1], 'foo two') + + self.reset_display_options() + + warnings.filters = warn_filters + + def test_to_hierarchical(self): + index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two')]) + result = index.to_hierarchical(3) + expected = MultiIndex(levels=[[1, 2], ['one', 'two']], + labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], + [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) + tm.assert_index_equal(result, expected) + self.assertEqual(result.names, index.names) + + # K > 1 + result = index.to_hierarchical(3, 2) + expected = MultiIndex(levels=[[1, 2], ['one', 'two']], + labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], + [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) + tm.assert_index_equal(result, expected) + self.assertEqual(result.names, index.names) + + # non-sorted + index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), + (2, 'a'), (2, 'b')], + names=['N1', 'N2']) + + result = index.to_hierarchical(2) + expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), + (1, 'b'), + (2, 'a'), (2, 'a'), + (2, 'b'), (2, 'b')], + names=['N1', 'N2']) + tm.assert_index_equal(result, expected) + self.assertEqual(result.names, index.names) + + def test_bounds(self): + self.index._bounds + + def test_equals(self): + self.assertTrue(self.index.equals(self.index)) + self.assertTrue(self.index.equal_levels(self.index)) + + self.assertFalse(self.index.equals(self.index[:-1])) + + self.assertTrue(self.index.equals(self.index._tuple_index)) + + # different number of levels + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) + self.assertFalse(index.equals(index2)) + self.assertFalse(index.equal_levels(index2)) + + # levels are different + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) + + major_labels = np.array([0, 0, 1, 2, 2, 3]) + minor_labels = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + self.assertFalse(self.index.equals(index)) + self.assertFalse(self.index.equal_levels(index)) + + # some of the labels are different + major_axis = Index(['foo', 'bar', 'baz', 'qux']) + minor_axis = Index(['one', 'two']) + + major_labels = np.array([0, 0, 2, 2, 3, 3]) + minor_labels = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels]) + self.assertFalse(self.index.equals(index)) + + def test_identical(self): + mi = self.index.copy() + mi2 = self.index.copy() + self.assertTrue(mi.identical(mi2)) + + mi = mi.set_names(['new1', 'new2']) + self.assertTrue(mi.equals(mi2)) + self.assertFalse(mi.identical(mi2)) + + mi2 = mi2.set_names(['new1', 'new2']) + self.assertTrue(mi.identical(mi2)) + + mi3 = Index(mi.tolist(), names=mi.names) + mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) + self.assertTrue(mi.identical(mi3)) + self.assertFalse(mi.identical(mi4)) + self.assertTrue(mi.equals(mi4)) + + def test_is_(self): + mi = MultiIndex.from_tuples(lzip(range(10), range(10))) + self.assertTrue(mi.is_(mi)) + self.assertTrue(mi.is_(mi.view())) + self.assertTrue(mi.is_(mi.view().view().view().view())) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + self.assertTrue(mi2.is_(mi)) + self.assertTrue(mi.is_(mi2)) + + self.assertTrue(mi.is_(mi.set_names(["C", "D"]))) + mi2 = mi.view() + mi2.set_names(["E", "F"], inplace=True) + self.assertTrue(mi.is_(mi2)) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([lrange(10), lrange(10)]) + self.assertFalse(mi3.is_(mi2)) + # shouldn't change + self.assertTrue(mi2.is_(mi)) + mi4 = mi3.view() + mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True) + self.assertFalse(mi4.is_(mi3)) + mi5 = mi.view() + mi5.set_levels(mi5.levels, inplace=True) + self.assertFalse(mi5.is_(mi)) + + def test_union(self): + piece1 = self.index[:5][::-1] + piece2 = self.index[3:] + + the_union = piece1 | piece2 + + tups = sorted(self.index._tuple_index) + expected = MultiIndex.from_tuples(tups) + + self.assertTrue(the_union.equals(expected)) + + # corner case, pass self or empty thing: + the_union = self.index.union(self.index) + self.assertIs(the_union, self.index) + + the_union = self.index.union(self.index[:0]) + self.assertIs(the_union, self.index) + + # won't work in python 3 + # tuples = self.index._tuple_index + # result = self.index[:4] | tuples[4:] + # self.assertTrue(result.equals(tuples)) + + # not valid for python 3 + # def test_union_with_regular_index(self): + # other = Index(['A', 'B', 'C']) + + # result = other.union(self.index) + # self.assertIn(('foo', 'one'), result) + # self.assertIn('B', result) + + # result2 = self.index.union(other) + # self.assertTrue(result.equals(result2)) + + def test_intersection(self): + piece1 = self.index[:5][::-1] + piece2 = self.index[3:] + + the_int = piece1 & piece2 + tups = sorted(self.index[3:5]._tuple_index) + expected = MultiIndex.from_tuples(tups) + self.assertTrue(the_int.equals(expected)) + + # corner case, pass self + the_int = self.index.intersection(self.index) + self.assertIs(the_int, self.index) + + # empty intersection: disjoint + empty = self.index[:2] & self.index[2:] + expected = self.index[:0] + self.assertTrue(empty.equals(expected)) + + # can't do in python 3 + # tuples = self.index._tuple_index + # result = self.index & tuples + # self.assertTrue(result.equals(tuples)) + + def test_difference(self): + + first = self.index + result = first.difference(self.index[-3:]) + + # - API change GH 8226 + with tm.assert_produces_warning(): + first - self.index[-3:] + with tm.assert_produces_warning(): + self.index[-3:] - first + with tm.assert_produces_warning(): + self.index[-3:] - first.tolist() + + self.assertRaises(TypeError, lambda: first.tolist() - self.index[-3:]) + + expected = MultiIndex.from_tuples(sorted(self.index[:-3].values), + sortorder=0, + names=self.index.names) + + tm.assertIsInstance(result, MultiIndex) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.names, self.index.names) + + # empty difference: reflexive + result = self.index.difference(self.index) + expected = self.index[:0] + self.assertTrue(result.equals(expected)) + self.assertEqual(result.names, self.index.names) + + # empty difference: superset + result = self.index[-3:].difference(self.index) + expected = self.index[:0] + self.assertTrue(result.equals(expected)) + self.assertEqual(result.names, self.index.names) + + # empty difference: degenerate + result = self.index[:0].difference(self.index) + expected = self.index[:0] + self.assertTrue(result.equals(expected)) + self.assertEqual(result.names, self.index.names) + + # names not the same + chunklet = self.index[-3:] + chunklet.names = ['foo', 'baz'] + result = first.difference(chunklet) + self.assertEqual(result.names, (None, None)) + + # empty, but non-equal + result = self.index.difference(self.index.sortlevel(1)[0]) + self.assertEqual(len(result), 0) + + # raise Exception called with non-MultiIndex + result = first.difference(first._tuple_index) + self.assertTrue(result.equals(first[:0])) + + # name from empty array + result = first.difference([]) + self.assertTrue(first.equals(result)) + self.assertEqual(first.names, result.names) + + # name from non-empty array + result = first.difference([('foo', 'one')]) + expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( + 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) + expected.names = first.names + self.assertEqual(first.names, result.names) + assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list" + " of tuples", first.difference, [1, 2, 3, 4, 5]) + + def test_from_tuples(self): + assertRaisesRegexp(TypeError, 'Cannot infer number of levels from' + ' empty list', MultiIndex.from_tuples, []) + + idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) + self.assertEqual(len(idx), 2) + + def test_argsort(self): + result = self.index.argsort() + expected = self.index._tuple_index.argsort() + tm.assert_numpy_array_equal(result, expected) + + def test_sortlevel(self): + import random + + tuples = list(self.index) + random.shuffle(tuples) + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + self.assertTrue(sorted_idx.equals(expected)) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + self.assertTrue(sorted_idx.equals(expected[::-1])) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + self.assertTrue(sorted_idx.equals(expected)) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + self.assertTrue(sorted_idx.equals(expected[::-1])) + + def test_sortlevel_not_sort_remaining(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) + self.assertTrue(sorted_idx.equals(mi)) + + def test_sortlevel_deterministic(self): + tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), + ('foo', 'one'), ('baz', 'two'), ('qux', 'one')] + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + self.assertTrue(sorted_idx.equals(expected)) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + self.assertTrue(sorted_idx.equals(expected[::-1])) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + self.assertTrue(sorted_idx.equals(expected)) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + self.assertTrue(sorted_idx.equals(expected[::-1])) + + def test_dims(self): + pass + + def test_drop(self): + dropped = self.index.drop([('foo', 'two'), ('qux', 'one')]) + + index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')]) + dropped2 = self.index.drop(index) + + expected = self.index[[0, 2, 3, 5]] + self.assert_index_equal(dropped, expected) + self.assert_index_equal(dropped2, expected) + + dropped = self.index.drop(['bar']) + expected = self.index[[0, 1, 3, 4, 5]] + self.assert_index_equal(dropped, expected) + + dropped = self.index.drop('foo') + expected = self.index[[2, 3, 4, 5]] + self.assert_index_equal(dropped, expected) + + index = MultiIndex.from_tuples([('bar', 'two')]) + self.assertRaises(KeyError, self.index.drop, [('bar', 'two')]) + self.assertRaises(KeyError, self.index.drop, index) + self.assertRaises(KeyError, self.index.drop, ['foo', 'two']) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) + self.assertRaises(KeyError, self.index.drop, mixed_index) + + # error='ignore' + dropped = self.index.drop(index, errors='ignore') + expected = self.index[[0, 1, 2, 3, 4, 5]] + self.assert_index_equal(dropped, expected) + + dropped = self.index.drop(mixed_index, errors='ignore') + expected = self.index[[0, 1, 2, 3, 5]] + self.assert_index_equal(dropped, expected) + + dropped = self.index.drop(['foo', 'two'], errors='ignore') + expected = self.index[[2, 3, 4, 5]] + self.assert_index_equal(dropped, expected) + + # mixed partial / full drop + dropped = self.index.drop(['foo', ('qux', 'one')]) + expected = self.index[[2, 3, 5]] + self.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ['foo', ('qux', 'one'), 'two'] + self.assertRaises(KeyError, self.index.drop, mixed_index) + dropped = self.index.drop(mixed_index, errors='ignore') + expected = self.index[[2, 3, 5]] + self.assert_index_equal(dropped, expected) + + def test_droplevel_with_names(self): + index = self.index[self.index.get_loc('foo')] + dropped = index.droplevel(0) + self.assertEqual(dropped.name, 'second') + + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) + dropped = index.droplevel(0) + self.assertEqual(dropped.names, ('two', 'three')) + + dropped = index.droplevel('two') + expected = index.droplevel(1) + self.assertTrue(dropped.equals(expected)) + + def test_droplevel_multiple(self): + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) + + dropped = index[:2].droplevel(['three', 'one']) + expected = index[:2].droplevel(2).droplevel(0) + self.assertTrue(dropped.equals(expected)) + + def test_insert(self): + # key contained in all levels + new_index = self.index.insert(0, ('bar', 'two')) + self.assertTrue(new_index.equal_levels(self.index)) + self.assertEqual(new_index[0], ('bar', 'two')) + + # key not contained in all levels + new_index = self.index.insert(0, ('abc', 'three')) + tm.assert_numpy_array_equal(new_index.levels[0], + list(self.index.levels[0]) + ['abc']) + tm.assert_numpy_array_equal(new_index.levels[1], + list(self.index.levels[1]) + ['three']) + self.assertEqual(new_index[0], ('abc', 'three')) + + # key wrong length + assertRaisesRegexp(ValueError, "Item must have length equal to number" + " of levels", self.index.insert, 0, ('foo2', )) + + left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], + columns=['1st', '2nd', '3rd']) + left.set_index(['1st', '2nd'], inplace=True) + ts = left['3rd'].copy(deep=True) + + left.loc[('b', 'x'), '3rd'] = 2 + left.loc[('b', 'a'), '3rd'] = -1 + left.loc[('b', 'b'), '3rd'] = 3 + left.loc[('a', 'x'), '3rd'] = 4 + left.loc[('a', 'w'), '3rd'] = 5 + left.loc[('a', 'a'), '3rd'] = 6 + + ts.loc[('b', 'x')] = 2 + ts.loc['b', 'a'] = -1 + ts.loc[('b', 'b')] = 3 + ts.loc['a', 'x'] = 4 + ts.loc[('a', 'w')] = 5 + ts.loc['a', 'a'] = 6 + + right = pd.DataFrame([['a', 'b', 0], + ['b', 'd', 1], + ['b', 'x', 2], + ['b', 'a', -1], + ['b', 'b', 3], + ['a', 'x', 4], + ['a', 'w', 5], + ['a', 'a', 6]], + columns=['1st', '2nd', '3rd']) + right.set_index(['1st', '2nd'], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right['3rd']) + + # GH9250 + idx = [('test1', i) for i in range(5)] + \ + [('test2', i) for i in range(6)] + \ + [('test', 17), ('test', 18)] + + left = pd.Series(np.linspace(0, 10, 11), + pd.MultiIndex.from_tuples(idx[:-2])) + + left.loc[('test', 17)] = 11 + left.ix[('test', 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), + pd.MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + def test_take_preserve_name(self): + taken = self.index.take([3, 0, 1]) + self.assertEqual(taken.names, self.index.names) + + def test_join_level(self): + def _check_how(other, how): + join_index, lidx, ridx = other.join(self.index, how=how, + level='second', + return_indexers=True) + + exp_level = other.join(self.index.levels[1], how=how) + self.assertTrue(join_index.levels[0].equals(self.index.levels[0])) + self.assertTrue(join_index.levels[1].equals(exp_level)) + + # pare down levels + mask = np.array( + [x[1] in exp_level for x in self.index], dtype=bool) + exp_values = self.index.values[mask] + tm.assert_numpy_array_equal(join_index.values, exp_values) + + if how in ('outer', 'inner'): + join_index2, ridx2, lidx2 = \ + self.index.join(other, how=how, level='second', + return_indexers=True) + + self.assertTrue(join_index.equals(join_index2)) + tm.assert_numpy_array_equal(lidx, lidx2) + tm.assert_numpy_array_equal(ridx, ridx2) + tm.assert_numpy_array_equal(join_index2.values, exp_values) + + def _check_all(other): + _check_how(other, 'outer') + _check_how(other, 'inner') + _check_how(other, 'left') + _check_how(other, 'right') + + _check_all(Index(['three', 'one', 'two'])) + _check_all(Index(['one'])) + _check_all(Index(['one', 'three'])) + + # some corner cases + idx = Index(['three', 'one', 'two']) + result = idx.join(self.index, level='second') + tm.assertIsInstance(result, MultiIndex) + + assertRaisesRegexp(TypeError, "Join.*MultiIndex.*ambiguous", + self.index.join, self.index, level=1) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + res = self.index + joined = res.join(res, how=kind) + self.assertIs(res, joined) + + def test_join_multi(self): + # GH 10665 + midx = pd.MultiIndex.from_product( + [np.arange(4), np.arange(4)], names=['a', 'b']) + idx = pd.Index([1, 2, 5], name='b') + + # inner + jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) + exp_idx = pd.MultiIndex.from_product( + [np.arange(4), [1, 2]], names=['a', 'b']) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14]) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1]) + self.assert_index_equal(jidx, exp_idx) + self.assert_numpy_array_equal(lidx, exp_lidx) + self.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True) + self.assert_index_equal(jidx, exp_idx) + self.assert_numpy_array_equal(lidx, exp_lidx) + self.assert_numpy_array_equal(ridx, exp_ridx) + + # keep MultiIndex + jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) + exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, + 1, -1]) + self.assert_index_equal(jidx, midx) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) + self.assert_index_equal(jidx, midx) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, exp_ridx) + + def test_reindex(self): + result, indexer = self.index.reindex(list(self.index[:4])) + tm.assertIsInstance(result, MultiIndex) + self.check_level_names(result, self.index[:4].names) + + result, indexer = self.index.reindex(list(self.index)) + tm.assertIsInstance(result, MultiIndex) + self.assertIsNone(indexer) + self.check_level_names(result, self.index.names) + + def test_reindex_level(self): + idx = Index(['one']) + + target, indexer = self.index.reindex(idx, level='second') + target2, indexer2 = idx.reindex(self.index, level='second') + + exp_index = self.index.join(idx, level='second', how='right') + exp_index2 = self.index.join(idx, level='second', how='left') + + self.assertTrue(target.equals(exp_index)) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer) + + self.assertTrue(target2.equals(exp_index2)) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2) + + assertRaisesRegexp(TypeError, "Fill method not supported", + self.index.reindex, self.index, method='pad', + level='second') + + assertRaisesRegexp(TypeError, "Fill method not supported", idx.reindex, + idx, method='bfill', level='first') + + def test_duplicates(self): + self.assertFalse(self.index.has_duplicates) + self.assertTrue(self.index.append(self.index).has_duplicates) + + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ + [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) + self.assertTrue(index.has_duplicates) + + # GH 9075 + t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), + (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), + (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), + (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), + (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), + (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), + (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), + (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), + (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), + (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), + (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), + (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), + (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), + (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), + (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), + (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), + (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), + (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] + + index = pd.MultiIndex.from_tuples(t) + self.assertFalse(index.has_duplicates) + + # handle int64 overflow if possible + def check(nlevels, with_nulls): + labels = np.tile(np.arange(500), 2) + level = np.arange(500) + + if with_nulls: # inject some null values + labels[500] = -1 # common nan value + labels = list(labels.copy() for i in range(nlevels)) + for i in range(nlevels): + labels[i][500 + i - nlevels // 2] = -1 + + labels += [np.array([-1, 1]).repeat(500)] + else: + labels = [labels] * nlevels + [np.arange(2).repeat(500)] + + levels = [level] * nlevels + [[0, 1]] + + # no dups + index = MultiIndex(levels=levels, labels=labels) + self.assertFalse(index.has_duplicates) + + # with a dup + if with_nulls: + f = lambda a: np.insert(a, 1000, a[0]) + labels = list(map(f, labels)) + index = MultiIndex(levels=levels, labels=labels) + else: + values = index.values.tolist() + index = MultiIndex.from_tuples(values + [values[0]]) + + self.assertTrue(index.has_duplicates) + + # no overflow + check(4, False) + check(4, True) + + # overflow possible + check(8, False) + check(8, True) + + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + labels = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, labels=labels) + + for keep in ['first', 'last', False]: + left = mi.duplicated(keep=keep) + right = pd.lib.duplicated(mi.values, keep=keep) + tm.assert_numpy_array_equal(left, right) + + # GH5873 + for a in [101, 102]: + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + self.assertFalse(mi.has_duplicates) + self.assertEqual(mi.get_duplicates(), []) + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + 2, dtype='bool')) + + for n in range(1, 6): # 1st level shape + for m in range(1, 5): # 2nd level shape + # all possible unique combinations, including nan + lab = product(range(-1, n), range(-1, m)) + mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], + labels=np.random.permutation(list(lab)).T) + self.assertEqual(len(mi), (n + 1) * (m + 1)) + self.assertFalse(mi.has_duplicates) + self.assertEqual(mi.get_duplicates(), []) + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + len(mi), dtype='bool')) + + def test_duplicate_meta_data(self): + # GH 10115 + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ + [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) + for idx in [index, + index.set_names([None, None]), + index.set_names([None, 'Num']), + index.set_names(['Upper', 'Num']), ]: + self.assertTrue(idx.has_duplicates) + self.assertEqual(idx.drop_duplicates().names, idx.names) + + def test_tolist(self): + result = self.index.tolist() + exp = list(self.index.values) + self.assertEqual(result, exp) + + def test_repr_with_unicode_data(self): + with pd.core.config.option_context("display.encoding", 'UTF-8'): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + self.assertFalse("\\u" in repr(index) + ) # we don't want unicode-escaped + + def test_repr_roundtrip(self): + + mi = MultiIndex.from_product([list('ab'), range(3)], + names=['first', 'second']) + str(mi) + + if PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + self.assertEqual( + mi.get_level_values('first').inferred_type, 'string') + self.assertEqual( + result.get_level_values('first').inferred_type, 'unicode') + + mi_u = MultiIndex.from_product( + [list(u'ab'), range(3)], names=['first', 'second']) + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) + + # formatting + if PY3: + str(mi) + else: + compat.text_type(mi) + + # long format + mi = MultiIndex.from_product([list('abcdefg'), range(10)], + names=['first', 'second']) + result = str(mi) + + if PY3: + tm.assert_index_equal(eval(repr(mi)), mi, exact=True) + else: + result = eval(repr(mi)) + # string coerces to unicode + tm.assert_index_equal(result, mi, exact=False) + self.assertEqual( + mi.get_level_values('first').inferred_type, 'string') + self.assertEqual( + result.get_level_values('first').inferred_type, 'unicode') + + mi = MultiIndex.from_product( + [list(u'abcdefg'), range(10)], names=['first', 'second']) + result = eval(repr(mi_u)) + tm.assert_index_equal(result, mi_u, exact=True) + + def test_str(self): + # tested elsewhere + pass + + def test_unicode_string_with_unicode(self): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + + if PY3: + str(idx) + else: + compat.text_type(idx) + + def test_bytestring_with_unicode(self): + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + + if PY3: + bytes(idx) + else: + str(idx) + + def test_slice_keep_name(self): + x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], + names=['x', 'y']) + self.assertEqual(x[1:].names, x.names) + + def test_isnull_behavior(self): + # should not segfault GH5123 + # NOTE: if MI representation changes, may make sense to allow + # isnull(MI) + with tm.assertRaises(NotImplementedError): + pd.isnull(self.index) + + def test_level_setting_resets_attributes(self): + ind = MultiIndex.from_arrays([ + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) + assert ind.is_monotonic + ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], + inplace=True) + # if this fails, probably didn't reset the cache correctly. + assert not ind.is_monotonic + + def test_isin(self): + values = [('foo', 2), ('bar', 3), ('quux', 4)] + + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = MultiIndex.from_arrays([[], []]) + result = idx.isin(values) + self.assertEqual(len(result), 0) + self.assertEqual(result.dtype, np.bool_) + + def test_isin_nan(self): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + [False, False]) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + [False, False]) + + def test_isin_level_kwarg(self): + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) + + vals_0 = ['foo', 'bar', 'quux'] + vals_1 = [2, 3, 10] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) + + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) + + self.assertRaises(IndexError, idx.isin, vals_0, level=5) + self.assertRaises(IndexError, idx.isin, vals_0, level=-5) + + self.assertRaises(KeyError, idx.isin, vals_0, level=1.0) + self.assertRaises(KeyError, idx.isin, vals_1, level=-1.0) + self.assertRaises(KeyError, idx.isin, vals_1, level='A') + + idx.names = ['A', 'B'] + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) + + self.assertRaises(KeyError, idx.isin, vals_1, level='C') + + def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): + # GH6552 + idx = self.index.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + self.assertEqual(idx.reindex([])[0].names, [None, None]) + self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None]) + self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None]) + self.assertEqual(idx.reindex(target.values)[0].names, [None, None]) + self.assertEqual( + idx.reindex(other_dtype.tolist())[0].names, [None, None]) + self.assertEqual( + idx.reindex(other_dtype.values)[0].names, [None, None]) + + idx.names = ['foo', 'bar'] + self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar']) + self.assertEqual( + idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) + self.assertEqual( + idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) + + def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], + names=['foo', 'bar']) + self.assertEqual(idx.reindex([], level=0)[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex([], level=1)[0].names, ['foo', 'bar']) + + def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + self.assertEqual(idx.reindex([], level=0)[0].levels[0].dtype.type, + np.int64) + self.assertEqual(idx.reindex([], level=1)[0].levels[1].dtype.type, + np.object_) + + def test_groupby(self): + groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = self.index.get_values().tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = self.index.groupby(self.index) + exp = dict((key, [key]) for key in self.index) + tm.assert_dict_equal(groups, exp) + + def test_index_name_retained(self): + # GH9857 + result = pd.DataFrame({'x': [1, 2, 6], + 'y': [2, 2, 8], + 'z': [-5, 0, 5]}) + result = result.set_index('z') + result.loc[10] = [9, 10] + df_expected = pd.DataFrame({'x': [1, 2, 6, 9], + 'y': [2, 2, 8, 10], + 'z': [-5, 0, 5, 10]}) + df_expected = df_expected.set_index('z') + tm.assert_frame_equal(result, df_expected) + + def test_equals_operator(self): + # GH9785 + self.assertTrue((self.index == self.index).all()) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py new file mode 100644 index 0000000000000..d14f7bbc680df --- /dev/null +++ b/pandas/tests/indexes/test_numeric.py @@ -0,0 +1,824 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime +from pandas import compat +from pandas.compat import range, lrange, u, PY3 + +import numpy as np + +from pandas import (date_range, Series, DataFrame, + Index, Float64Index, Int64Index, RangeIndex) +from pandas.util.testing import assertRaisesRegexp + +import pandas.util.testing as tm +import pandas.core.config as cf + +import pandas as pd +from pandas.lib import Timestamp + +from .common import Base + + +class Numeric(Base): + + def test_numeric_compat(self): + + idx = self.create_index() + didx = idx * idx + + result = idx * 1 + tm.assert_index_equal(result, idx) + + result = 1 * idx + tm.assert_index_equal(result, idx) + + # in general not true for RangeIndex + if not isinstance(idx, RangeIndex): + result = idx * idx + tm.assert_index_equal(result, idx ** 2) + + # truediv under PY3 + result = idx / 1 + expected = idx + if PY3: + expected = expected.astype('float64') + tm.assert_index_equal(result, expected) + + result = idx / 2 + if PY3: + expected = expected.astype('float64') + expected = Index(idx.values / 2) + tm.assert_index_equal(result, expected) + + result = idx // 1 + tm.assert_index_equal(result, idx) + + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, idx * 5) + + result = idx * np.arange(5, dtype='int64') + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='int64')) + tm.assert_index_equal(result, didx) + + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + expected = Float64Index(np.arange(5, dtype='float64') * + (np.arange(5, dtype='float64') + 0.1)) + tm.assert_index_equal(result, expected) + + # invalid + self.assertRaises(TypeError, + lambda: idx * date_range('20130101', periods=5)) + self.assertRaises(ValueError, lambda: idx * idx[0:3]) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overriden explicity for Float/Int Index + idx = self._holder(np.arange(5, dtype='int64')) + + # float conversions + arr = np.arange(5, dtype='int64') * 3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx, expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx, expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5, dtype='float64') + result = fidx - a + tm.assert_index_equal(result, expected) + + expected = Float64Index(-arr) + a = np.zeros(5, dtype='float64') + result = a - fidx + tm.assert_index_equal(result, expected) + + def test_ufunc_compat(self): + idx = self._holder(np.arange(5, dtype='int64')) + result = np.sin(idx) + expected = Float64Index(np.sin(np.arange(5, dtype='int64'))) + tm.assert_index_equal(result, expected) + + def test_index_groupby(self): + int_idx = Index(range(6)) + float_idx = Index(np.arange(0, 0.6, 0.1)) + obj_idx = Index('A B C D E F'.split()) + dt_idx = pd.date_range('2013-01-01', freq='M', periods=6) + + for idx in [int_idx, float_idx, obj_idx, dt_idx]: + to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) + self.assertEqual(idx.groupby(to_groupby), + {1.0: [idx[0], idx[5]], 2.0: [idx[1], idx[4]]}) + + to_groupby = Index([datetime(2011, 11, 1), + datetime(2011, 12, 1), + pd.NaT, + pd.NaT, + datetime(2011, 12, 1), + datetime(2011, 11, 1)], + tz='UTC').values + + ex_keys = pd.tslib.datetime_to_datetime64(np.array([Timestamp( + '2011-11-01'), Timestamp('2011-12-01')])) + expected = {ex_keys[0][0]: [idx[0], idx[5]], + ex_keys[0][1]: [idx[1], idx[4]]} + self.assertEqual(idx.groupby(to_groupby), expected) + + def test_modulo(self): + # GH 9244 + index = self.create_index() + expected = Index(index.values % 2) + self.assert_index_equal(index % 2, expected) + + +class TestFloat64Index(Numeric, tm.TestCase): + _holder = Float64Index + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), + float=Float64Index(np.arange(5) * 2.5)) + self.setup_indices() + + def create_index(self): + return Float64Index(np.arange(5, dtype='float64')) + + def test_repr_roundtrip(self): + for ind in (self.mixed, self.float): + tm.assert_index_equal(eval(repr(ind)), ind) + + def check_is_index(self, i): + self.assertIsInstance(i, Index) + self.assertNotIsInstance(i, Float64Index) + + def check_coerce(self, a, b, is_float_index=True): + self.assertTrue(a.equals(b)) + if is_float_index: + self.assertIsInstance(b, Float64Index) + else: + self.check_is_index(b) + + def test_constructor(self): + + # explicit construction + index = Float64Index([1, 2, 3, 4, 5]) + self.assertIsInstance(index, Float64Index) + self.assertTrue((index.values == np.array( + [1, 2, 3, 4, 5], dtype='float64')).all()) + index = Float64Index(np.array([1, 2, 3, 4, 5])) + self.assertIsInstance(index, Float64Index) + index = Float64Index([1., 2, 3, 4, 5]) + self.assertIsInstance(index, Float64Index) + index = Float64Index(np.array([1., 2, 3, 4, 5])) + self.assertIsInstance(index, Float64Index) + self.assertEqual(index.dtype, float) + + index = Float64Index(np.array([1., 2, 3, 4, 5]), dtype=np.float32) + self.assertIsInstance(index, Float64Index) + self.assertEqual(index.dtype, np.float64) + + index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + self.assertIsInstance(index, Float64Index) + self.assertEqual(index.dtype, np.float64) + + # nan handling + result = Float64Index([np.nan, np.nan]) + self.assertTrue(pd.isnull(result.values).all()) + result = Float64Index(np.array([np.nan])) + self.assertTrue(pd.isnull(result.values).all()) + result = Index(np.array([np.nan])) + self.assertTrue(pd.isnull(result.values).all()) + + def test_constructor_invalid(self): + + # invalid + self.assertRaises(TypeError, Float64Index, 0.) + self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.]) + self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')]) + + def test_constructor_coerce(self): + + self.check_coerce(self.mixed, Index([1.5, 2, 3, 4, 5])) + self.check_coerce(self.float, Index(np.arange(5) * 2.5)) + self.check_coerce(self.float, Index(np.array( + np.arange(5) * 2.5, dtype=object))) + + def test_constructor_explicit(self): + + # these don't auto convert + self.check_coerce(self.float, + Index((np.arange(5) * 2.5), dtype=object), + is_float_index=False) + self.check_coerce(self.mixed, Index( + [1.5, 2, 3, 4, 5], dtype=object), is_float_index=False) + + def test_astype(self): + + result = self.float.astype(object) + self.assertTrue(result.equals(self.float)) + self.assertTrue(self.float.equals(result)) + self.check_is_index(result) + + i = self.mixed.copy() + i.name = 'foo' + result = i.astype(object) + self.assertTrue(result.equals(i)) + self.assertTrue(i.equals(result)) + self.check_is_index(result) + + def test_equals(self): + + i = Float64Index([1.0, 2.0]) + self.assertTrue(i.equals(i)) + self.assertTrue(i.identical(i)) + + i2 = Float64Index([1.0, 2.0]) + self.assertTrue(i.equals(i2)) + + i = Float64Index([1.0, np.nan]) + self.assertTrue(i.equals(i)) + self.assertTrue(i.identical(i)) + + i2 = Float64Index([1.0, np.nan]) + self.assertTrue(i.equals(i2)) + + def test_get_indexer(self): + idx = Float64Index([0.0, 1.0, 2.0]) + tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) + + target = [-0.1, 0.5, 1.1] + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) + + def test_get_loc(self): + idx = Float64Index([0.0, 1.0, 2.0]) + for method in [None, 'pad', 'backfill', 'nearest']: + self.assertEqual(idx.get_loc(1, method), 1) + if method is not None: + self.assertEqual(idx.get_loc(1, method, tolerance=0), 1) + + for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: + self.assertEqual(idx.get_loc(1.1, method), loc) + self.assertEqual(idx.get_loc(1.1, method, tolerance=0.9), loc) + + self.assertRaises(KeyError, idx.get_loc, 'foo') + self.assertRaises(KeyError, idx.get_loc, 1.5) + self.assertRaises(KeyError, idx.get_loc, 1.5, method='pad', + tolerance=0.1) + + with tm.assertRaisesRegexp(ValueError, 'must be numeric'): + idx.get_loc(1.4, method='nearest', tolerance='foo') + + def test_get_loc_na(self): + idx = Float64Index([np.nan, 1, 2]) + self.assertEqual(idx.get_loc(1), 1) + self.assertEqual(idx.get_loc(np.nan), 0) + + idx = Float64Index([np.nan, 1, np.nan]) + self.assertEqual(idx.get_loc(1), 1) + + # representable by slice [0:2:2] + # self.assertRaises(KeyError, idx.slice_locs, np.nan) + sliced = idx.slice_locs(np.nan) + self.assertTrue(isinstance(sliced, tuple)) + self.assertEqual(sliced, (0, 3)) + + # not representable by slice + idx = Float64Index([np.nan, 1, np.nan, np.nan]) + self.assertEqual(idx.get_loc(1), 1) + self.assertRaises(KeyError, idx.slice_locs, np.nan) + + def test_contains_nans(self): + i = Float64Index([1.0, 2.0, np.nan]) + self.assertTrue(np.nan in i) + + def test_contains_not_nans(self): + i = Float64Index([1.0, 2.0, np.nan]) + self.assertTrue(1.0 in i) + + def test_doesnt_contain_all_the_things(self): + i = Float64Index([np.nan]) + self.assertFalse(i.isin([0]).item()) + self.assertFalse(i.isin([1]).item()) + self.assertTrue(i.isin([np.nan]).item()) + + def test_nan_multiple_containment(self): + i = Float64Index([1.0, np.nan]) + tm.assert_numpy_array_equal(i.isin([1.0]), np.array([True, False])) + tm.assert_numpy_array_equal(i.isin([2.0, np.pi]), + np.array([False, False])) + tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, True])) + tm.assert_numpy_array_equal(i.isin([1.0, np.nan]), + np.array([True, True])) + i = Float64Index([1.0, 2.0]) + tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, False])) + + def test_astype_from_object(self): + index = Index([1.0, np.nan, 0.2], dtype='object') + result = index.astype(float) + expected = Float64Index([1.0, np.nan, 0.2]) + tm.assert_equal(result.dtype, expected.dtype) + tm.assert_index_equal(result, expected) + + def test_fillna_float64(self): + # GH 11343 + idx = Index([1.0, np.nan, 3.0], dtype=float, name='x') + # can't downcast + exp = Index([1.0, 0.1, 3.0], name='x') + self.assert_index_equal(idx.fillna(0.1), exp) + + # downcast + exp = Float64Index([1.0, 2.0, 3.0], name='x') + self.assert_index_equal(idx.fillna(2), exp) + + # object + exp = Index([1.0, 'obj', 3.0], name='x') + self.assert_index_equal(idx.fillna('obj'), exp) + + +class TestInt64Index(Numeric, tm.TestCase): + _holder = Int64Index + _multiprocess_can_split_ = True + + def setUp(self): + self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) + self.setup_indices() + + def create_index(self): + return Int64Index(np.arange(5, dtype='int64')) + + def test_too_many_names(self): + def testit(): + self.index.names = ["roger", "harold"] + + assertRaisesRegexp(ValueError, "^Length", testit) + + def test_constructor(self): + # pass list, coerce fine + index = Int64Index([-5, 0, 1, 2]) + expected = np.array([-5, 0, 1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(index, expected) + + # from iterable + index = Int64Index(iter([-5, 0, 1, 2])) + tm.assert_numpy_array_equal(index, expected) + + # scalar raise Exception + self.assertRaises(TypeError, Int64Index, 5) + + # copy + arr = self.index.values + new_index = Int64Index(arr, copy=True) + tm.assert_numpy_array_equal(new_index, self.index) + val = arr[0] + 3000 + # this should not change index + arr[0] = val + self.assertNotEqual(new_index[0], val) + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = Int64Index(arr) + self.assertEqual(index.values.dtype, np.int64) + self.assertTrue(index.equals(arr)) + + # preventing casting + arr = np.array([1, '2', 3, '4'], dtype=object) + with tm.assertRaisesRegexp(TypeError, 'casting'): + Int64Index(arr) + + arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] + with tm.assertRaisesRegexp(TypeError, 'casting'): + Int64Index(arr_with_floats) + + def test_copy(self): + i = Int64Index([], name='Foo') + i_copy = i.copy() + self.assertEqual(i_copy.name, 'Foo') + + def test_view(self): + super(TestInt64Index, self).test_view() + + i = Int64Index([], name='Foo') + i_view = i.view() + self.assertEqual(i_view.name, 'Foo') + + i_view = i.view('i8') + tm.assert_index_equal(i, Int64Index(i_view, name='Foo')) + + i_view = i.view(Int64Index) + tm.assert_index_equal(i, Int64Index(i_view, name='Foo')) + + def test_coerce_list(self): + # coerce things + arr = Index([1, 2, 3, 4]) + tm.assertIsInstance(arr, Int64Index) + + # but not if explicit dtype passed + arr = Index([1, 2, 3, 4], dtype=object) + tm.assertIsInstance(arr, Index) + + def test_dtype(self): + self.assertEqual(self.index.dtype, np.int64) + + def test_is_monotonic(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_monotonic_increasing) + self.assertFalse(self.index.is_monotonic_decreasing) + + index = Int64Index([4, 3, 2, 1]) + self.assertFalse(index.is_monotonic) + self.assertTrue(index.is_monotonic_decreasing) + + index = Int64Index([1]) + self.assertTrue(index.is_monotonic) + self.assertTrue(index.is_monotonic_increasing) + self.assertTrue(index.is_monotonic_decreasing) + + def test_is_monotonic_na(self): + examples = [Index([np.nan]), + Index([np.nan, 1]), + Index([1, 2, np.nan]), + Index(['a', 'b', np.nan]), + pd.to_datetime(['NaT']), + pd.to_datetime(['NaT', '2000-01-01']), + pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']), + pd.to_timedelta(['1 day', 'NaT']), ] + for index in examples: + self.assertFalse(index.is_monotonic_increasing) + self.assertFalse(index.is_monotonic_decreasing) + + def test_equals(self): + same_values = Index(self.index, dtype=object) + self.assertTrue(self.index.equals(same_values)) + self.assertTrue(same_values.equals(self.index)) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def test_identical(self): + i = Index(self.index.copy()) + self.assertTrue(i.identical(self.index)) + + same_values_different_type = Index(i, dtype=object) + self.assertFalse(i.identical(same_values_different_type)) + + i = self.index.copy(dtype=object) + i = i.rename('foo') + same_values = Index(i, dtype=object) + self.assertTrue(same_values.identical(i)) + + self.assertFalse(i.identical(self.index)) + self.assertTrue(Index(same_values, name='foo', dtype=object).identical( + i)) + + self.assertFalse(self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) + + def test_get_indexer(self): + target = Int64Index(np.arange(10)) + indexer = self.index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + target = Int64Index(np.arange(10)) + indexer = self.index.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + target = Int64Index(np.arange(10)) + indexer = self.index.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + tm.assert_numpy_array_equal(indexer, expected) + + def test_join_outer(self): + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], + dtype=np.int64) + eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], + dtype=np.int64) + + tm.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='outer', + return_indexers=True) + noidx_res = self.index.join(other_mono, how='outer') + self.assertTrue(res.equals(noidx_res)) + + eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], + dtype=np.int64) + tm.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([2, 12]) + elidx = np.array([1, 6]) + eridx = np.array([4, 1]) + + tm.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='inner', + return_indexers=True) + + res2 = self.index.intersection(other_mono) + self.assertTrue(res.equals(res2)) + + eridx = np.array([1, 4]) + tm.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], + dtype=np.int64) + + tm.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='left', + return_indexers=True) + eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], + dtype=np.int64) + tm.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eridx = np.array([0, 1, 2, 3, -1, -1]) + elidx = np.array([0, 0, 1, 2, 3, 4]) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int64) + + tm.assertIsInstance(other, Int64Index) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + # monotonic + res, lidx, ridx = self.index.join(other_mono, how='right', + return_indexers=True) + eres = other_mono + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.int64) + tm.assertIsInstance(other, Int64Index) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + elidx = np.array([0, 1, 2, 3, -1, -1]) + eridx = np.array([0, 0, 1, 2, 3, 4]) + self.assertTrue(res.equals(eres)) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self): + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = self.index.join(other, how='outer') + outer2 = other.join(self.index, how='outer') + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, + 16, 18], dtype=object) + self.assertTrue(outer.equals(outer2)) + self.assertTrue(outer.equals(expected)) + + inner = self.index.join(other, how='inner') + inner2 = other.join(self.index, how='inner') + expected = Index([6, 8, 10], dtype=object) + self.assertTrue(inner.equals(inner2)) + self.assertTrue(inner.equals(expected)) + + left = self.index.join(other, how='left') + self.assertTrue(left.equals(self.index)) + + left2 = other.join(self.index, how='left') + self.assertTrue(left2.equals(other)) + + right = self.index.join(other, how='right') + self.assertTrue(right.equals(other)) + + right2 = other.join(self.index, how='right') + self.assertTrue(right2.equals(self.index)) + + def test_join_non_unique(self): + left = Index([4, 4, 3, 3]) + + joined, lidx, ridx = left.join(left, return_indexers=True) + + exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) + self.assertTrue(joined.equals(exp_joined)) + + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.int64) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int64) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assertIs(self.index, joined) + + def test_intersection(self): + other = Index([1, 2, 3, 4, 5]) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + tm.assert_numpy_array_equal(result, expected) + + result = other.intersection(self.index) + expected = np.sort(np.asarray(np.intersect1d(self.index.values, + other.values))) + tm.assert_numpy_array_equal(result, expected) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + self.assertEqual(len(res), 0) + + def test_union_noncomparable(self): + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = np.concatenate((self.index, other)) + tm.assert_numpy_array_equal(result, expected) + + result = other.union(self.index) + expected = np.concatenate((other, self.index)) + tm.assert_numpy_array_equal(result, expected) + + def test_cant_or_shouldnt_cast(self): + # can't + data = ['foo', 'bar', 'baz'] + self.assertRaises(TypeError, Int64Index, data) + + # shouldn't + data = ['0', '1', '2'] + self.assertRaises(TypeError, Int64Index, data) + + def test_view_Index(self): + self.index.view(Index) + + def test_prevent_casting(self): + result = self.index.astype('O') + self.assertEqual(result.dtype, np.object_) + + def test_take_preserve_name(self): + index = Int64Index([1, 2, 3, 4], name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_int_name_format(self): + index = Index(['a', 'b', 'c'], name=0) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) + repr(s) + repr(df) + + def test_print_unicode_columns(self): + df = pd.DataFrame({u("\u05d0"): [1, 2, 3], + "\u05d1": [4, 5, 6], + "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_repr_summary(self): + with cf.option_context('display.max_seq_items', 10): + r = repr(pd.Index(np.arange(1000))) + self.assertTrue(len(r) < 200) + self.assertTrue("..." in r) + + def test_repr_roundtrip(self): + tm.assert_index_equal(eval(repr(self.index)), self.index) + + def test_unicode_string_with_unicode(self): + idx = Index(lrange(1000)) + + if PY3: + str(idx) + else: + compat.text_type(idx) + + def test_bytestring_with_unicode(self): + idx = Index(lrange(1000)) + if PY3: + bytes(idx) + else: + str(idx) + + def test_slice_keep_name(self): + idx = Int64Index([1, 2], name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_ufunc_coercions(self): + idx = Int64Index([1, 2, 3, 4, 5], name='x') + + result = np.sqrt(idx) + tm.assertIsInstance(result, Float64Index) + exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') + tm.assert_index_equal(result, exp) + + result = np.divide(idx, 2.) + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + tm.assert_index_equal(result, exp) + + # _evaluate_numeric_binop + result = idx + 2. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([3., 4., 5., 6., 7.], name='x') + tm.assert_index_equal(result, exp) + + result = idx - 2. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([-1., 0., 1., 2., 3.], name='x') + tm.assert_index_equal(result, exp) + + result = idx * 1. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([1., 2., 3., 4., 5.], name='x') + tm.assert_index_equal(result, exp) + + result = idx / 2. + tm.assertIsInstance(result, Float64Index) + exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + tm.assert_index_equal(result, exp) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py new file mode 100644 index 0000000000000..cf7fe67be6401 --- /dev/null +++ b/pandas/tests/indexes/test_range.py @@ -0,0 +1,806 @@ +# -*- coding: utf-8 -*- + +from datetime import datetime +from itertools import combinations +import operator + +from pandas.compat import range, u, PY3 + +import numpy as np + +from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex) +from pandas.util.testing import assertRaisesRegexp + +import pandas.util.testing as tm + +import pandas as pd + +from .test_numeric import Numeric + + +class TestRangeIndex(Numeric, tm.TestCase): + _holder = RangeIndex + _compat_props = ['shape', 'ndim', 'size', 'itemsize'] + + def setUp(self): + self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) + self.setup_indices() + + def create_index(self): + return RangeIndex(5) + + def test_binops(self): + ops = [operator.add, operator.sub, operator.mul, operator.floordiv, + operator.truediv, pow] + scalars = [-1, 1, 2] + idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), RangeIndex(5, -5, -1)] + for op in ops: + for a, b in combinations(idxs, 2): + result = op(a, b) + expected = op(Int64Index(a), Int64Index(b)) + tm.assert_index_equal(result, expected) + for idx in idxs: + for scalar in scalars: + result = op(idx, scalar) + expected = op(Int64Index(idx), scalar) + tm.assert_index_equal(result, expected) + + def test_too_many_names(self): + def testit(): + self.index.names = ["roger", "harold"] + + assertRaisesRegexp(ValueError, "^Length", testit) + + def test_constructor(self): + index = RangeIndex(5) + expected = np.arange(5, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._start, 0) + self.assertEqual(index._stop, 5) + self.assertEqual(index._step, 1) + self.assertEqual(index.name, None) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex(1, 5) + expected = np.arange(1, 5, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._start, 1) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex(1, 5, 2) + expected = np.arange(1, 5, 2, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._step, 2) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex() + expected = np.empty(0, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._start, 0) + self.assertEqual(index._stop, 0) + self.assertEqual(index._step, 1) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex(name='Foo') + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index.name, 'Foo') + + # we don't allow on a bare Index + self.assertRaises(TypeError, lambda: Index(0, 1000)) + + # invalid args + for i in [Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']), + [], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10)]: + self.assertRaises(TypeError, lambda: RangeIndex(i)) + + def test_constructor_same(self): + + # pass thru w and w/o copy + index = RangeIndex(1, 5, 2) + result = RangeIndex(index, copy=False) + self.assertTrue(result.identical(index)) + + result = RangeIndex(index, copy=True) + self.assertTrue(result.equals(index)) + + result = RangeIndex(index) + self.assertTrue(result.equals(index)) + + self.assertRaises(TypeError, + lambda: RangeIndex(index, dtype='float64')) + + def test_constructor_range(self): + + self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2))) + + result = RangeIndex.from_range(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + self.assertTrue(result.equals(expected)) + + result = RangeIndex.from_range(range(5, 6)) + expected = RangeIndex(5, 6, 1) + self.assertTrue(result.equals(expected)) + + # an invalid range + result = RangeIndex.from_range(range(5, 1)) + expected = RangeIndex(0, 0, 1) + self.assertTrue(result.equals(expected)) + + result = RangeIndex.from_range(range(5)) + expected = RangeIndex(0, 5, 1) + self.assertTrue(result.equals(expected)) + + result = Index(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + self.assertTrue(result.equals(expected)) + + self.assertRaises(TypeError, + lambda: Index(range(1, 5, 2), dtype='float64')) + + def test_numeric_compat2(self): + # validate that we are handling the RangeIndex overrides to numeric ops + # and returning RangeIndex where possible + + idx = RangeIndex(0, 10, 2) + + result = idx * 2 + expected = RangeIndex(0, 20, 4) + self.assertTrue(result.equals(expected)) + + result = idx + 2 + expected = RangeIndex(2, 12, 2) + self.assertTrue(result.equals(expected)) + + result = idx - 2 + expected = RangeIndex(-2, 8, 2) + self.assertTrue(result.equals(expected)) + + # truediv under PY3 + result = idx / 2 + if PY3: + expected = RangeIndex(0, 5, 1) + else: + expected = RangeIndex(0, 5, 1).astype('float64') + self.assertTrue(result.equals(expected)) + + result = idx / 4 + expected = RangeIndex(0, 10, 2).values / 4 + self.assertTrue(result.equals(expected)) + + result = idx // 1 + expected = idx + tm.assert_index_equal(result, expected, exact=True) + + # __mul__ + result = idx * idx + expected = Index(idx.values * idx.values) + tm.assert_index_equal(result, expected, exact=True) + + # __pow__ + idx = RangeIndex(0, 1000, 2) + result = idx ** 2 + expected = idx._int64index ** 2 + tm.assert_index_equal(Index(result.values), expected, exact=True) + + # __floordiv__ + cases_exact = [(RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)), + (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)), + (RangeIndex(0, 1000, 1), 2, + RangeIndex(0, 1000, 1)._int64index // 2), + (RangeIndex(0, 100, 1), 2.0, + RangeIndex(0, 100, 1)._int64index // 2.0), + (RangeIndex(), 50, RangeIndex()), + (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)), + (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)), + (RangeIndex(-100, -200, 3), 2, RangeIndex())] + for idx, div, expected in cases_exact: + tm.assert_index_equal(idx // div, expected, exact=True) + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + self.assertEqual(index.values.dtype, np.int64) + self.assertTrue(index.equals(arr)) + + # non-int raise Exception + self.assertRaises(TypeError, RangeIndex, '1', '10', '1') + self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) + + # invalid passed type + self.assertRaises(TypeError, lambda: RangeIndex(1, 5, dtype='float64')) + + def test_copy(self): + i = RangeIndex(5, name='Foo') + i_copy = i.copy() + self.assertTrue(i_copy is not i) + self.assertTrue(i_copy.identical(i)) + self.assertEqual(i_copy._start, 0) + self.assertEqual(i_copy._stop, 5) + self.assertEqual(i_copy._step, 1) + self.assertEqual(i_copy.name, 'Foo') + + def test_repr(self): + i = RangeIndex(5, name='Foo') + result = repr(i) + if PY3: + expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" + else: + expected = "RangeIndex(start=0, stop=5, step=1, name=u'Foo')" + self.assertTrue(result, expected) + + result = eval(result) + self.assertTrue(result.equals(i)) + + i = RangeIndex(5, 0, -1) + result = repr(i) + expected = "RangeIndex(start=5, stop=0, step=-1)" + self.assertEqual(result, expected) + + result = eval(result) + self.assertTrue(result.equals(i)) + + def test_insert(self): + + idx = RangeIndex(5, name='Foo') + result = idx[1:4] + + # test 0th element + self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) + + def test_delete(self): + + idx = RangeIndex(5, name='Foo') + expected = idx[1:].astype(int) + result = idx.delete(0) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + expected = idx[:-1].astype(int) + result = idx.delete(-1) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + with tm.assertRaises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) + + def test_view(self): + super(TestRangeIndex, self).test_view() + + i = RangeIndex(name='Foo') + i_view = i.view() + self.assertEqual(i_view.name, 'Foo') + + i_view = i.view('i8') + tm.assert_numpy_array_equal(i, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_dtype(self): + self.assertEqual(self.index.dtype, np.int64) + + def test_is_monotonic(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_monotonic_increasing) + self.assertFalse(self.index.is_monotonic_decreasing) + + index = RangeIndex(4, 0, -1) + self.assertFalse(index.is_monotonic) + self.assertTrue(index.is_monotonic_decreasing) + + index = RangeIndex(1, 2) + self.assertTrue(index.is_monotonic) + self.assertTrue(index.is_monotonic_increasing) + self.assertTrue(index.is_monotonic_decreasing) + + def test_equals(self): + equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), + (RangeIndex(0), RangeIndex(1, -1, 3)), + (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), + (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2))] + for left, right in equiv_pairs: + self.assertTrue(left.equals(right)) + self.assertTrue(right.equals(left)) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def test_identical(self): + i = Index(self.index.copy()) + self.assertTrue(i.identical(self.index)) + + # we don't allow object dtype for RangeIndex + if isinstance(self.index, RangeIndex): + return + + same_values_different_type = Index(i, dtype=object) + self.assertFalse(i.identical(same_values_different_type)) + + i = self.index.copy(dtype=object) + i = i.rename('foo') + same_values = Index(i, dtype=object) + self.assertTrue(same_values.identical(self.index.copy(dtype=object))) + + self.assertFalse(i.identical(self.index)) + self.assertTrue(Index(same_values, name='foo', dtype=object).identical( + i)) + + self.assertFalse(self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) + + def test_get_indexer(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + self.assert_numpy_array_equal(indexer, expected) + + def test_join_outer(self): + # join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25]) + elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, + -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, + 5, 4, 3, 2, 1, 0], dtype=np.int64) + + self.assertIsInstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + # join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + self.assertIsInstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + # Join with non-RangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9]) + eridx = np.array([9, 7]) + + self.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + # Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + self.assertIsInstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + # Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], + dtype=np.int64) + + self.assertIsInstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + # Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + + self.assertIsInstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + # Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], + dtype=np.int64) + + self.assertIsInstance(other, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + # Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + + self.assertIsInstance(other, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + def test_join_non_int_index(self): + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = self.index.join(other, how='outer') + outer2 = other.join(self.index, how='outer') + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, + 16, 18], dtype=object) + self.assertTrue(outer.equals(outer2)) + self.assertTrue(outer.equals(expected)) + + inner = self.index.join(other, how='inner') + inner2 = other.join(self.index, how='inner') + expected = Index([6, 8, 10], dtype=object) + self.assertTrue(inner.equals(inner2)) + self.assertTrue(inner.equals(expected)) + + left = self.index.join(other, how='left') + self.assertTrue(left.equals(self.index)) + + left2 = other.join(self.index, how='left') + self.assertTrue(left2.equals(other)) + + right = self.index.join(other, how='right') + self.assertTrue(right.equals(other)) + + right2 = other.join(self.index, how='right') + self.assertTrue(right2.equals(self.index)) + + def test_join_non_unique(self): + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = self.index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], + dtype=np.int64) + + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assertIs(self.index, joined) + + def test_intersection(self): + # intersect with Int64Index + other = Index(np.arange(1, 6)) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + result = other.intersection(self.index) + expected = np.sort(np.asarray(np.intersect1d(self.index.values, + other.values))) + self.assert_numpy_array_equal(result, expected) + + # intersect with increasing RangeIndex + other = RangeIndex(1, 6) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + # intersect with decreasing RangeIndex + other = RangeIndex(5, 0, -1) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + self.assertEqual(len(res), 0) + + def test_union_noncomparable(self): + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = np.concatenate((self.index, other)) + self.assert_numpy_array_equal(result, expected) + + result = other.union(self.index) + expected = np.concatenate((other, self.index)) + self.assert_numpy_array_equal(result, expected) + + def test_union(self): + RI = RangeIndex + I64 = Int64Index + cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)), + (RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)), + (RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)), + (RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)), + (RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)), + (RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)), + (RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)), + (RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)), + (RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)), + (RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)), + (RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)), + (RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)), + (RI(), RI(), RI()), + (RI(0, -10, -2), RI(), RI(0, -10, -2)), + (RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)), + (RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)), + (RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)), + (RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)), + (RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)), + (RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])), + (RI(0, 10, 1), I64([]), RI(0, 10, 1)), + (RI(), I64([1, 5, 6]), I64([1, 5, 6]))] + for idx1, idx2, expected in cases: + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + res3 = idx1._int64index.union(idx2) + tm.assert_index_equal(res1, expected, exact=True) + tm.assert_index_equal(res2, expected, exact=True) + tm.assert_index_equal(res3, expected) + + def test_nbytes(self): + + # memory savings vs int index + i = RangeIndex(0, 1000) + self.assertTrue(i.nbytes < i.astype(int).nbytes / 10) + + # constant memory usage + i2 = RangeIndex(0, 10) + self.assertEqual(i.nbytes, i2.nbytes) + + def test_cant_or_shouldnt_cast(self): + # can't + self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') + + # shouldn't + self.assertRaises(TypeError, RangeIndex, '0', '1', '2') + + def test_view_Index(self): + self.index.view(Index) + + def test_prevent_casting(self): + result = self.index.astype('O') + self.assertEqual(result.dtype, np.object_) + + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_print_unicode_columns(self): + df = pd.DataFrame({u("\u05d0"): [1, 2, 3], + "\u05d1": [4, 5, 6], + "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_repr_roundtrip(self): + tm.assert_index_equal(eval(repr(self.index)), self.index) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overriden explicity for Float/Int Index + idx = RangeIndex(5) + + # float conversions + arr = np.arange(5, dtype='int64') * 3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx, expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx, expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5, dtype='float64') + result = fidx - a + tm.assert_index_equal(result, expected) + + expected = Float64Index(-arr) + a = np.zeros(5, dtype='float64') + result = a - fidx + tm.assert_index_equal(result, expected) + + def test_duplicates(self): + for ind in self.indices: + if not len(ind): + continue + idx = self.indices[ind] + self.assertTrue(idx.is_unique) + self.assertFalse(idx.has_duplicates) + + def test_ufunc_compat(self): + idx = RangeIndex(5) + result = np.sin(idx) + expected = Float64Index(np.sin(np.arange(5, dtype='int64'))) + tm.assert_index_equal(result, expected) + + def test_extended_gcd(self): + result = self.index._extended_gcd(6, 10) + self.assertEqual(result[0], result[1] * 6 + result[2] * 10) + self.assertEqual(2, result[0]) + + result = self.index._extended_gcd(10, 6) + self.assertEqual(2, result[1] * 10 + result[2] * 6) + self.assertEqual(2, result[0]) + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(1, 6)._min_fitting_element(1) + self.assertEqual(1, result) + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + self.assertEqual(1, result) + + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) + self.assertEqual(big_num, result) + + def test_max_fitting_element(self): + result = RangeIndex(0, 20, 2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(1, 6)._max_fitting_element(4) + self.assertEqual(4, result) + + result = RangeIndex(18, -2, -2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(5, 0, -1)._max_fitting_element(4) + self.assertEqual(4, result) + + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) + self.assertEqual(big_num, result) + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + + def test_slice_specialised(self): + + # scalar indexing + res = self.index[1] + expected = 2 + self.assertEqual(res, expected) + + res = self.index[-1] + expected = 18 + self.assertEqual(res, expected) + + # slicing + # slice value completion + index = self.index[:] + expected = self.index + self.assert_numpy_array_equal(index, expected) + + # positive slice values + index = self.index[7:10:2] + expected = np.array([14, 18]) + self.assert_numpy_array_equal(index, expected) + + # negative slice values + index = self.index[-1:-5:-2] + expected = np.array([18, 14]) + self.assert_numpy_array_equal(index, expected) + + # stop overshoot + index = self.index[2:100:4] + expected = np.array([4, 12]) + self.assert_numpy_array_equal(index, expected) + + # reverse + index = self.index[::-1] + expected = self.index.values[::-1] + self.assert_numpy_array_equal(index, expected) + + index = self.index[-8::-1] + expected = np.array([4, 2, 0]) + self.assert_numpy_array_equal(index, expected) + + index = self.index[-40::-1] + expected = np.array([]) + self.assert_numpy_array_equal(index, expected) + + index = self.index[40::-1] + expected = self.index.values[40::-1] + self.assert_numpy_array_equal(index, expected) + + index = self.index[10::-1] + expected = self.index.values[::-1] + self.assert_numpy_array_equal(index, expected) + + def test_len_specialised(self): + + # make sure that our len is the same as + # np.arange calc + + for step in np.arange(1, 6, 1): + + arr = np.arange(0, 5, step) + i = RangeIndex(0, 5, step) + self.assertEqual(len(i), len(arr)) + + i = RangeIndex(5, 0, step) + self.assertEqual(len(i), 0) + + for step in np.arange(-6, -1, 1): + + arr = np.arange(5, 0, step) + i = RangeIndex(5, 0, step) + self.assertEqual(len(i), len(arr)) + + i = RangeIndex(0, 5, step) + self.assertEqual(len(i), 0) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py deleted file mode 100644 index af42c2751bf46..0000000000000 --- a/pandas/tests/test_index.py +++ /dev/null @@ -1,7146 +0,0 @@ -# -*- coding: utf-8 -*- -# pylint: disable=E1101,E1103,W0232 - -# TODO(wesm): fix long line flake8 issues -# flake8: noqa - -from datetime import datetime, timedelta, time -from pandas import compat -from pandas.compat import (long, is_platform_windows, range, lrange, lzip, u, - zip, PY3) -from itertools import combinations -import operator -import re -import nose -import warnings -import os - -import numpy as np - -from pandas import (period_range, date_range, Categorical, Series, DataFrame, - Index, Float64Index, Int64Index, RangeIndex, MultiIndex, - CategoricalIndex, DatetimeIndex, TimedeltaIndex, - PeriodIndex) -from pandas.core.index import InvalidIndexError -from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, - assert_copy) - -import pandas.util.testing as tm -import pandas.core.config as cf - -from pandas.tseries.index import _to_m8 - -import pandas as pd -from pandas.lib import Timestamp -from itertools import product - -if PY3: - unicode = lambda x: x - - -class Base(object): - """ base class for index sub-class tests """ - _holder = None - _compat_props = ['shape', 'ndim', 'size', 'itemsize', 'nbytes'] - - def setup_indices(self): - # setup the test indices in the self.indicies dict - for name, ind in self.indices.items(): - setattr(self, name, ind) - - def verify_pickle(self, index): - unpickled = self.round_trip_pickle(index) - self.assertTrue(index.equals(unpickled)) - - def test_pickle_compat_construction(self): - # this is testing for pickle compat - if self._holder is None: - return - - # need an object to create with - self.assertRaises(TypeError, self._holder) - - def test_shift(self): - - # GH8083 test the base class for shift - idx = self.create_index() - self.assertRaises(NotImplementedError, idx.shift, 1) - self.assertRaises(NotImplementedError, idx.shift, 1, 2) - - def test_create_index_existing_name(self): - - # GH11193, when an existing index is passed, and a new name is not - # specified, the new index should inherit the previous object name - expected = self.create_index() - if not isinstance(expected, MultiIndex): - expected.name = 'foo' - result = pd.Index(expected) - tm.assert_index_equal(result, expected) - - result = pd.Index(expected, name='bar') - expected.name = 'bar' - tm.assert_index_equal(result, expected) - else: - expected.names = ['foo', 'bar'] - result = pd.Index(expected) - tm.assert_index_equal( - result, Index(Index([('foo', 'one'), ('foo', 'two'), - ('bar', 'one'), ('baz', 'two'), - ('qux', 'one'), ('qux', 'two')], - dtype='object'), - names=['foo', 'bar'])) - - result = pd.Index(expected, names=['A', 'B']) - tm.assert_index_equal( - result, - Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], - dtype='object'), names=['A', 'B'])) - - def test_numeric_compat(self): - - idx = self.create_index() - tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda: idx * 1) - tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda: 1 * idx) - - div_err = "cannot perform __truediv__" if PY3 \ - else "cannot perform __div__" - tm.assertRaisesRegexp(TypeError, div_err, lambda: idx / 1) - tm.assertRaisesRegexp(TypeError, div_err, lambda: 1 / idx) - tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda: idx // 1) - tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda: 1 // idx) - - def test_logical_compat(self): - idx = self.create_index() - tm.assertRaisesRegexp(TypeError, 'cannot perform all', - lambda: idx.all()) - tm.assertRaisesRegexp(TypeError, 'cannot perform any', - lambda: idx.any()) - - def test_boolean_context_compat(self): - - # boolean context compat - idx = self.create_index() - - def f(): - if idx: - pass - - tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) - - def test_reindex_base(self): - idx = self.create_index() - expected = np.arange(idx.size) - - actual = idx.get_indexer(idx) - tm.assert_numpy_array_equal(expected, actual) - - with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') - - def test_ndarray_compat_properties(self): - - idx = self.create_index() - self.assertTrue(idx.T.equals(idx)) - self.assertTrue(idx.transpose().equals(idx)) - - values = idx.values - for prop in self._compat_props: - self.assertEqual(getattr(idx, prop), getattr(values, prop)) - - # test for validity - idx.nbytes - idx.values.nbytes - - def test_repr_roundtrip(self): - - idx = self.create_index() - tm.assert_index_equal(eval(repr(idx)), idx) - - def test_str(self): - - # test the string repr - idx = self.create_index() - idx.name = 'foo' - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) - - def test_dtype_str(self): - for idx in self.indices.values(): - dtype = idx.dtype_str - self.assertIsInstance(dtype, compat.string_types) - if isinstance(idx, PeriodIndex): - self.assertEqual(dtype, 'period') - else: - self.assertEqual(dtype, str(idx.dtype)) - - def test_repr_max_seq_item_setting(self): - # GH10182 - idx = self.create_index() - idx = idx.repeat(50) - with pd.option_context("display.max_seq_items", None): - repr(idx) - self.assertFalse('...' in str(idx)) - - def test_wrong_number_names(self): - def testit(ind): - ind.names = ["apple", "banana", "carrot"] - - for ind in self.indices.values(): - assertRaisesRegexp(ValueError, "^Length", testit, ind) - - def test_set_name_methods(self): - new_name = "This is the new name for this index" - for ind in self.indices.values(): - - # don't tests a MultiIndex here (as its tested separated) - if isinstance(ind, MultiIndex): - continue - - original_name = ind.name - new_ind = ind.set_names([new_name]) - self.assertEqual(new_ind.name, new_name) - self.assertEqual(ind.name, original_name) - res = ind.rename(new_name, inplace=True) - - # should return None - self.assertIsNone(res) - self.assertEqual(ind.name, new_name) - self.assertEqual(ind.names, [new_name]) - # with assertRaisesRegexp(TypeError, "list-like"): - # # should still fail even if it would be the right length - # ind.set_names("a") - with assertRaisesRegexp(ValueError, "Level must be None"): - ind.set_names("a", level=0) - - # rename in place just leaves tuples and other containers alone - name = ('A', 'B') - ind.rename(name, inplace=True) - self.assertEqual(ind.name, name) - self.assertEqual(ind.names, [name]) - - def test_hash_error(self): - for ind in self.indices.values(): - with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % - type(ind).__name__): - hash(ind) - - def test_copy_and_deepcopy(self): - from copy import copy, deepcopy - - for ind in self.indices.values(): - - # don't tests a MultiIndex here (as its tested separated) - if isinstance(ind, MultiIndex): - continue - - for func in (copy, deepcopy): - idx_copy = func(ind) - self.assertIsNot(idx_copy, ind) - self.assertTrue(idx_copy.equals(ind)) - - new_copy = ind.copy(deep=True, name="banana") - self.assertEqual(new_copy.name, "banana") - - def test_duplicates(self): - for ind in self.indices.values(): - - if not len(ind): - continue - if isinstance(ind, MultiIndex): - continue - idx = self._holder([ind[0]] * 5) - self.assertFalse(idx.is_unique) - self.assertTrue(idx.has_duplicates) - - # GH 10115 - # preserve names - idx.name = 'foo' - result = idx.drop_duplicates() - self.assertEqual(result.name, 'foo') - self.assert_index_equal(result, Index([ind[0]], name='foo')) - - def test_sort(self): - for ind in self.indices.values(): - self.assertRaises(TypeError, ind.sort) - - def test_order(self): - for ind in self.indices.values(): - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ind.order() - - def test_mutability(self): - for ind in self.indices.values(): - if not len(ind): - continue - self.assertRaises(TypeError, ind.__setitem__, 0, ind[0]) - - def test_view(self): - for ind in self.indices.values(): - i_view = ind.view() - self.assertEqual(i_view.name, ind.name) - - def test_compat(self): - for ind in self.indices.values(): - self.assertEqual(ind.tolist(), list(ind)) - - def test_argsort(self): - for k, ind in self.indices.items(): - - # sep teststed - if k in ['catIndex']: - continue - - result = ind.argsort() - expected = np.array(ind).argsort() - tm.assert_numpy_array_equal(result, expected) - - def test_pickle(self): - for ind in self.indices.values(): - self.verify_pickle(ind) - ind.name = 'foo' - self.verify_pickle(ind) - - def test_take(self): - indexer = [4, 3, 0, 2] - for k, ind in self.indices.items(): - - # separate - if k in ['boolIndex', 'tuples', 'empty']: - continue - - result = ind.take(indexer) - expected = ind[indexer] - self.assertTrue(result.equals(expected)) - - if not isinstance(ind, - (DatetimeIndex, PeriodIndex, TimedeltaIndex)): - # GH 10791 - with tm.assertRaises(AttributeError): - ind.freq - - def test_setops_errorcases(self): - for name, idx in compat.iteritems(self.indices): - # # non-iterable input - cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, - idx.sym_diff] - - for method in methods: - for case in cases: - assertRaisesRegexp(TypeError, - "Input must be Index or array-like", - method, case) - - def test_intersection_base(self): - for name, idx in compat.iteritems(self.indices): - first = idx[:5] - second = idx[:3] - intersect = first.intersection(second) - - if isinstance(idx, CategoricalIndex): - pass - else: - self.assertTrue(tm.equalContents(intersect, second)) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): - result = first.intersection(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): - result = first.intersection([1, 2, 3]) - - def test_union_base(self): - for name, idx in compat.iteritems(self.indices): - first = idx[3:] - second = idx[:5] - everything = idx - union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): - result = first.union(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): - result = first.union([1, 2, 3]) - - def test_difference_base(self): - for name, idx in compat.iteritems(self.indices): - first = idx[2:] - second = idx[:4] - answer = idx[4:] - result = first.difference(second) - - if isinstance(idx, CategoricalIndex): - pass - else: - self.assertTrue(tm.equalContents(result, answer)) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): - result = first.difference(case) - elif isinstance(idx, CategoricalIndex): - pass - elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): - self.assertEqual(result.__class__, answer.__class__) - tm.assert_numpy_array_equal(result.asi8, answer.asi8) - else: - result = first.difference(case) - self.assertTrue(tm.equalContents(result, answer)) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): - result = first.difference([1, 2, 3]) - - def test_symmetric_diff(self): - for name, idx in compat.iteritems(self.indices): - first = idx[1:] - second = idx[:-1] - if isinstance(idx, CategoricalIndex): - pass - else: - answer = idx[[0, -1]] - result = first.sym_diff(second) - self.assertTrue(tm.equalContents(result, answer)) - - # GH 10149 - cases = [klass(second.values) - for klass in [np.array, Series, list]] - for case in cases: - if isinstance(idx, PeriodIndex): - msg = "can only call with other PeriodIndex-ed objects" - with tm.assertRaisesRegexp(ValueError, msg): - result = first.sym_diff(case) - elif isinstance(idx, CategoricalIndex): - pass - else: - result = first.sym_diff(case) - self.assertTrue(tm.equalContents(result, answer)) - - if isinstance(idx, MultiIndex): - msg = "other must be a MultiIndex or a list of tuples" - with tm.assertRaisesRegexp(TypeError, msg): - result = first.sym_diff([1, 2, 3]) - - def test_insert_base(self): - - for name, idx in compat.iteritems(self.indices): - result = idx[1:4] - - if not len(idx): - continue - - # test 0th element - self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) - - def test_delete_base(self): - - for name, idx in compat.iteritems(self.indices): - - if not len(idx): - continue - - if isinstance(idx, RangeIndex): - # tested in class - continue - - expected = idx[1:] - result = idx.delete(0) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - - expected = idx[:-1] - result = idx.delete(-1) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - - with tm.assertRaises((IndexError, ValueError)): - # either depending on numpy version - result = idx.delete(len(idx)) - - def test_equals_op(self): - # GH9947, GH10637 - index_a = self.create_index() - if isinstance(index_a, PeriodIndex): - return - - n = len(index_a) - index_b = index_a[0:-1] - index_c = index_a[0:-1].append(index_a[-2:-1]) - index_d = index_a[0:1] - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - index_a == index_b - expected1 = np.array([True] * n) - expected2 = np.array([True] * (n - 1) + [False]) - tm.assert_numpy_array_equal(index_a == index_a, expected1) - tm.assert_numpy_array_equal(index_a == index_c, expected2) - - # test comparisons with numpy arrays - array_a = np.array(index_a) - array_b = np.array(index_a[0:-1]) - array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) - array_d = np.array(index_a[0:1]) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - index_a == array_b - tm.assert_numpy_array_equal(index_a == array_a, expected1) - tm.assert_numpy_array_equal(index_a == array_c, expected2) - - # test comparisons with Series - series_a = Series(array_a) - series_b = Series(array_b) - series_c = Series(array_c) - series_d = Series(array_d) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - index_a == series_b - tm.assert_numpy_array_equal(index_a == series_a, expected1) - tm.assert_numpy_array_equal(index_a == series_c, expected2) - - # cases where length is 1 for one of them - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - index_a == index_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - index_a == series_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - index_a == array_d - with tm.assertRaisesRegexp(ValueError, "Series lengths must match"): - series_a == series_d - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - series_a == array_d - - # comparing with a scalar should broadcast; note that we are excluding - # MultiIndex because in this case each item in the index is a tuple of - # length 2, and therefore is considered an array of length 2 in the - # comparison instead of a scalar - if not isinstance(index_a, MultiIndex): - expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) - # assuming the 2nd to last item is unique in the data - item = index_a[-2] - tm.assert_numpy_array_equal(index_a == item, expected3) - tm.assert_numpy_array_equal(series_a == item, expected3) - - def test_numpy_ufuncs(self): - # test ufuncs of numpy 1.9.2. see: - # http://docs.scipy.org/doc/numpy/reference/ufuncs.html - - # some functions are skipped because it may return different result - # for unicode input depending on numpy version - - for name, idx in compat.iteritems(self.indices): - for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, - np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, - np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, - np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, - np.rad2deg]: - if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) - # PeriodIndex behavior should be changed in future version - with tm.assertRaises(Exception): - func(idx) - elif isinstance(idx, (Float64Index, Int64Index)): - # coerces to float (e.g. np.sin) - result = func(idx) - exp = Index(func(idx.values), name=idx.name) - self.assert_index_equal(result, exp) - self.assertIsInstance(result, pd.Float64Index) - else: - # raise AttributeError or TypeError - if len(idx) == 0: - continue - else: - with tm.assertRaises(Exception): - func(idx) - - for func in [np.isfinite, np.isinf, np.isnan, np.signbit]: - if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): - # raise TypeError or ValueError (PeriodIndex) - with tm.assertRaises(Exception): - func(idx) - elif isinstance(idx, (Float64Index, Int64Index)): - # results in bool array - result = func(idx) - exp = func(idx.values) - self.assertIsInstance(result, np.ndarray) - tm.assertNotIsInstance(result, Index) - else: - if len(idx) == 0: - continue - else: - with tm.assertRaises(Exception): - func(idx) - - def test_hasnans_isnans(self): - # GH 11343, added tests for hasnans / isnans - for name, index in self.indices.items(): - if isinstance(index, MultiIndex): - pass - else: - idx = index.copy() - - # cases in indices doesn't include NaN - expected = np.array([False] * len(idx), dtype=bool) - self.assert_numpy_array_equal(idx._isnan, expected) - self.assertFalse(idx.hasnans) - - idx = index.copy() - values = idx.values - - if len(index) == 0: - continue - elif isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT - elif isinstance(index, Int64Index): - continue - else: - values[1] = np.nan - - if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) - else: - idx = index.__class__(values) - - expected = np.array([False] * len(idx), dtype=bool) - expected[1] = True - self.assert_numpy_array_equal(idx._isnan, expected) - self.assertTrue(idx.hasnans) - - def test_fillna(self): - # GH 11343 - for name, index in self.indices.items(): - if len(index) == 0: - pass - elif isinstance(index, MultiIndex): - idx = index.copy() - msg = "isnull is not defined for MultiIndex" - with self.assertRaisesRegexp(NotImplementedError, msg): - idx.fillna(idx[0]) - else: - idx = index.copy() - result = idx.fillna(idx[0]) - self.assert_index_equal(result, idx) - self.assertFalse(result is idx) - - msg = "'value' must be a scalar, passed: " - with self.assertRaisesRegexp(TypeError, msg): - idx.fillna([idx[0]]) - - idx = index.copy() - values = idx.values - - if isinstance(index, pd.tseries.base.DatetimeIndexOpsMixin): - values[1] = pd.tslib.iNaT - elif isinstance(index, Int64Index): - continue - else: - values[1] = np.nan - - if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) - else: - idx = index.__class__(values) - - expected = np.array([False] * len(idx), dtype=bool) - expected[1] = True - self.assert_numpy_array_equal(idx._isnan, expected) - self.assertTrue(idx.hasnans) - - -class TestIndex(Base, tm.TestCase): - _holder = Index - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), - strIndex=tm.makeStringIndex(100), - dateIndex=tm.makeDateIndex(100), - periodIndex=tm.makePeriodIndex(100), - tdIndex=tm.makeTimedeltaIndex(100), - intIndex=tm.makeIntIndex(100), - rangeIndex=tm.makeIntIndex(100), - floatIndex=tm.makeFloatIndex(100), - boolIndex=Index([True, False]), - catIndex=tm.makeCategoricalIndex(100), - empty=Index([]), - tuples=MultiIndex.from_tuples(lzip( - ['foo', 'bar', 'baz'], [1, 2, 3]))) - self.setup_indices() - - def create_index(self): - return Index(list('abcde')) - - def test_new_axis(self): - new_index = self.dateIndex[None, :] - self.assertEqual(new_index.ndim, 2) - tm.assertIsInstance(new_index, np.ndarray) - - def test_copy_and_deepcopy(self): - super(TestIndex, self).test_copy_and_deepcopy() - - new_copy2 = self.intIndex.copy(dtype=int) - self.assertEqual(new_copy2.dtype.kind, 'i') - - def test_constructor(self): - # regular instance creation - tm.assert_contains_all(self.strIndex, self.strIndex) - tm.assert_contains_all(self.dateIndex, self.dateIndex) - - # casting - arr = np.array(self.strIndex) - index = Index(arr) - tm.assert_contains_all(arr, index) - tm.assert_numpy_array_equal(self.strIndex, index) - - # copy - arr = np.array(self.strIndex) - index = Index(arr, copy=True, name='name') - tm.assertIsInstance(index, Index) - self.assertEqual(index.name, 'name') - tm.assert_numpy_array_equal(arr, index) - arr[0] = "SOMEBIGLONGSTRING" - self.assertNotEqual(index[0], "SOMEBIGLONGSTRING") - - # what to do here? - # arr = np.array(5.) - # self.assertRaises(Exception, arr.view, Index) - - def test_constructor_corner(self): - # corner case - self.assertRaises(TypeError, Index, 0) - - def test_construction_list_mixed_tuples(self): - # 10697 - # if we are constructing from a mixed list of tuples, make sure that we - # are independent of the sorting order - idx1 = Index([('A', 1), 'B']) - self.assertIsInstance(idx1, Index) and self.assertNotInstance( - idx1, MultiIndex) - idx2 = Index(['B', ('A', 1)]) - self.assertIsInstance(idx2, Index) and self.assertNotInstance( - idx2, MultiIndex) - - def test_constructor_from_series(self): - - expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'), - Timestamp('20130101')]) - s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp( - '20130101')]) - result = Index(s) - self.assertTrue(result.equals(expected)) - result = DatetimeIndex(s) - self.assertTrue(result.equals(expected)) - - # GH 6273 - # create from a series, passing a freq - s = Series(pd.to_datetime(['1-1-1990', '2-1-1990', '3-1-1990', - '4-1-1990', '5-1-1990'])) - result = DatetimeIndex(s, freq='MS') - expected = DatetimeIndex( - ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990' - ], freq='MS') - self.assertTrue(result.equals(expected)) - - df = pd.DataFrame(np.random.rand(5, 3)) - df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', - '5-1-1990'] - result = DatetimeIndex(df['date'], freq='MS') - self.assertTrue(result.equals(expected)) - self.assertEqual(df['date'].dtype, object) - - exp = pd.Series( - ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990' - ], name='date') - self.assert_series_equal(df['date'], exp) - - # GH 6274 - # infer freq of same - result = pd.infer_freq(df['date']) - self.assertEqual(result, 'MS') - - def test_constructor_ndarray_like(self): - # GH 5460#issuecomment-44474502 - # it should be possible to convert any object that satisfies the numpy - # ndarray interface directly into an Index - class ArrayLike(object): - - def __init__(self, array): - self.array = array - - def __array__(self, dtype=None): - return self.array - - for array in [np.arange(5), np.array(['a', 'b', 'c']), - date_range('2000-01-01', periods=3).values]: - expected = pd.Index(array) - result = pd.Index(ArrayLike(array)) - self.assertTrue(result.equals(expected)) - - def test_index_ctor_infer_periodindex(self): - xp = period_range('2012-1-1', freq='M', periods=3) - rs = Index(xp) - tm.assert_numpy_array_equal(rs, xp) - tm.assertIsInstance(rs, PeriodIndex) - - def test_constructor_simple_new(self): - idx = Index([1, 2, 3, 4, 5], name='int') - result = idx._simple_new(idx, 'int') - self.assertTrue(result.equals(idx)) - - idx = Index([1.1, np.nan, 2.2, 3.0], name='float') - result = idx._simple_new(idx, 'float') - self.assertTrue(result.equals(idx)) - - idx = Index(['A', 'B', 'C', np.nan], name='obj') - result = idx._simple_new(idx, 'obj') - self.assertTrue(result.equals(idx)) - - def test_constructor_dtypes(self): - - for idx in [Index(np.array([1, 2, 3], dtype=int)), Index( - np.array( - [1, 2, 3], dtype=int), dtype=int), Index( - np.array( - [1., 2., 3.], dtype=float), dtype=int), Index( - [1, 2, 3], dtype=int), Index( - [1., 2., 3.], dtype=int)]: - self.assertIsInstance(idx, Int64Index) - - for idx in [Index(np.array([1., 2., 3.], dtype=float)), Index( - np.array( - [1, 2, 3], dtype=int), dtype=float), Index( - np.array( - [1., 2., 3.], dtype=float), dtype=float), Index( - [1, 2, 3], dtype=float), Index( - [1., 2., 3.], dtype=float)]: - self.assertIsInstance(idx, Float64Index) - - for idx in [Index(np.array( - [True, False, True], dtype=bool)), Index([True, False, True]), - Index( - np.array( - [True, False, True], dtype=bool), dtype=bool), - Index( - [True, False, True], dtype=bool)]: - self.assertIsInstance(idx, Index) - self.assertEqual(idx.dtype, object) - - for idx in [Index( - np.array([1, 2, 3], dtype=int), dtype='category'), Index( - [1, 2, 3], dtype='category'), Index( - np.array([np.datetime64('2011-01-01'), np.datetime64( - '2011-01-02')]), dtype='category'), Index( - [datetime(2011, 1, 1), datetime(2011, 1, 2) - ], dtype='category')]: - self.assertIsInstance(idx, CategoricalIndex) - - for idx in [Index(np.array([np.datetime64('2011-01-01'), np.datetime64( - '2011-01-02')])), - Index([datetime(2011, 1, 1), datetime(2011, 1, 2)])]: - self.assertIsInstance(idx, DatetimeIndex) - - for idx in [Index( - np.array([np.datetime64('2011-01-01'), np.datetime64( - '2011-01-02')]), dtype=object), Index( - [datetime(2011, 1, 1), datetime(2011, 1, 2) - ], dtype=object)]: - self.assertNotIsInstance(idx, DatetimeIndex) - self.assertIsInstance(idx, Index) - self.assertEqual(idx.dtype, object) - - for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64( - 1, 'D')])), Index([timedelta(1), timedelta(1)])]: - self.assertIsInstance(idx, TimedeltaIndex) - - for idx in [Index( - np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]), - dtype=object), Index( - [timedelta(1), timedelta(1)], dtype=object)]: - self.assertNotIsInstance(idx, TimedeltaIndex) - self.assertIsInstance(idx, Index) - self.assertEqual(idx.dtype, object) - - def test_view_with_args(self): - - restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex', - 'empty'] - - for i in restricted: - ind = self.indices[i] - - # with arguments - self.assertRaises(TypeError, lambda: ind.view('i8')) - - # these are ok - for i in list(set(self.indices.keys()) - set(restricted)): - ind = self.indices[i] - - # with arguments - ind.view('i8') - - def test_legacy_pickle_identity(self): - - # GH 8431 - pth = tm.get_data_path() - s1 = pd.read_pickle(os.path.join(pth, 's1-0.12.0.pickle')) - s2 = pd.read_pickle(os.path.join(pth, 's2-0.12.0.pickle')) - self.assertFalse(s1.index.identical(s2.index)) - self.assertFalse(s1.index.equals(s2.index)) - - def test_astype(self): - casted = self.intIndex.astype('i8') - - # it works! - casted.get_loc(5) - - # pass on name - self.intIndex.name = 'foobar' - casted = self.intIndex.astype('i8') - self.assertEqual(casted.name, 'foobar') - - def test_equals(self): - # same - self.assertTrue(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'c']))) - - # different length - self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b']))) - - # same length, different values - self.assertFalse(Index(['a', 'b', 'c']).equals(Index(['a', 'b', 'd']))) - - # Must also be an Index - self.assertFalse(Index(['a', 'b', 'c']).equals(['a', 'b', 'c'])) - - def test_insert(self): - - # GH 7256 - # validate neg/pos inserts - result = Index(['b', 'c', 'd']) - - # test 0th element - self.assertTrue(Index(['a', 'b', 'c', 'd']).equals(result.insert(0, - 'a'))) - - # test Nth element that follows Python list behavior - self.assertTrue(Index(['b', 'c', 'e', 'd']).equals(result.insert(-1, - 'e'))) - - # test loc +/- neq (0, -1) - self.assertTrue(result.insert(1, 'z').equals(result.insert(-2, 'z'))) - - # test empty - null_index = Index([]) - self.assertTrue(Index(['a']).equals(null_index.insert(0, 'a'))) - - def test_delete(self): - idx = Index(['a', 'b', 'c', 'd'], name='idx') - - expected = Index(['b', 'c', 'd'], name='idx') - result = idx.delete(0) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - - expected = Index(['a', 'b', 'c'], name='idx') - result = idx.delete(-1) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - - with tm.assertRaises((IndexError, ValueError)): - # either depeidnig on numpy version - result = idx.delete(5) - - def test_identical(self): - - # index - i1 = Index(['a', 'b', 'c']) - i2 = Index(['a', 'b', 'c']) - - self.assertTrue(i1.identical(i2)) - - i1 = i1.rename('foo') - self.assertTrue(i1.equals(i2)) - self.assertFalse(i1.identical(i2)) - - i2 = i2.rename('foo') - self.assertTrue(i1.identical(i2)) - - i3 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')]) - i4 = Index([('a', 'a'), ('a', 'b'), ('b', 'a')], tupleize_cols=False) - self.assertFalse(i3.identical(i4)) - - def test_is_(self): - ind = Index(range(10)) - self.assertTrue(ind.is_(ind)) - self.assertTrue(ind.is_(ind.view().view().view().view())) - self.assertFalse(ind.is_(Index(range(10)))) - self.assertFalse(ind.is_(ind.copy())) - self.assertFalse(ind.is_(ind.copy(deep=False))) - self.assertFalse(ind.is_(ind[:])) - self.assertFalse(ind.is_(ind.view(np.ndarray).view(Index))) - self.assertFalse(ind.is_(np.array(range(10)))) - - # quasi-implementation dependent - self.assertTrue(ind.is_(ind.view())) - ind2 = ind.view() - ind2.name = 'bob' - self.assertTrue(ind.is_(ind2)) - self.assertTrue(ind2.is_(ind)) - # doesn't matter if Indices are *actually* views of underlying data, - self.assertFalse(ind.is_(Index(ind.values))) - arr = np.array(range(1, 11)) - ind1 = Index(arr, copy=False) - ind2 = Index(arr, copy=False) - self.assertFalse(ind1.is_(ind2)) - - def test_asof(self): - d = self.dateIndex[0] - self.assertEqual(self.dateIndex.asof(d), d) - self.assertTrue(np.isnan(self.dateIndex.asof(d - timedelta(1)))) - - d = self.dateIndex[-1] - self.assertEqual(self.dateIndex.asof(d + timedelta(1)), d) - - d = self.dateIndex[0].to_datetime() - tm.assertIsInstance(self.dateIndex.asof(d), Timestamp) - - def test_asof_datetime_partial(self): - idx = pd.date_range('2010-01-01', periods=2, freq='m') - expected = Timestamp('2010-02-28') - result = idx.asof('2010-02') - self.assertEqual(result, expected) - self.assertFalse(isinstance(result, Index)) - - def test_nanosecond_index_access(self): - s = Series([Timestamp('20130101')]).values.view('i8')[0] - r = DatetimeIndex([s + 50 + i for i in range(100)]) - x = Series(np.random.randn(100), index=r) - - first_value = x.asof(x.index[0]) - - # this does not yet work, as parsing strings is done via dateutil - # self.assertEqual(first_value, - # x['2013-01-01 00:00:00.000000050+0000']) - - self.assertEqual( - first_value, - x[Timestamp(np.datetime64('2013-01-01 00:00:00.000000050+0000', - 'ns'))]) - - def test_comparators(self): - index = self.dateIndex - element = index[len(index) // 2] - element = _to_m8(element) - - arr = np.array(index) - - def _check(op): - arr_result = op(arr, element) - index_result = op(index, element) - - self.assertIsInstance(index_result, np.ndarray) - tm.assert_numpy_array_equal(arr_result, index_result) - - _check(operator.eq) - _check(operator.ne) - _check(operator.gt) - _check(operator.lt) - _check(operator.ge) - _check(operator.le) - - def test_booleanindex(self): - boolIdx = np.repeat(True, len(self.strIndex)).astype(bool) - boolIdx[5:30:2] = False - - subIndex = self.strIndex[boolIdx] - - for i, val in enumerate(subIndex): - self.assertEqual(subIndex.get_loc(val), i) - - subIndex = self.strIndex[list(boolIdx)] - for i, val in enumerate(subIndex): - self.assertEqual(subIndex.get_loc(val), i) - - def test_fancy(self): - sl = self.strIndex[[1, 2, 3]] - for i in sl: - self.assertEqual(i, sl[sl.get_loc(i)]) - - def test_empty_fancy(self): - empty_farr = np.array([], dtype=np.float_) - empty_iarr = np.array([], dtype=np.int_) - empty_barr = np.array([], dtype=np.bool_) - - # pd.DatetimeIndex is excluded, because it overrides getitem and should - # be tested separately. - for idx in [self.strIndex, self.intIndex, self.floatIndex]: - empty_idx = idx.__class__([]) - - self.assertTrue(idx[[]].identical(empty_idx)) - self.assertTrue(idx[empty_iarr].identical(empty_idx)) - self.assertTrue(idx[empty_barr].identical(empty_idx)) - - # np.ndarray only accepts ndarray of int & bool dtypes, so should - # Index. - self.assertRaises(IndexError, idx.__getitem__, empty_farr) - - def test_getitem(self): - arr = np.array(self.dateIndex) - exp = self.dateIndex[5] - exp = _to_m8(exp) - - self.assertEqual(exp, arr[5]) - - def test_intersection(self): - first = self.strIndex[:20] - second = self.strIndex[:10] - intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) - - # Corner cases - inter = first.intersection(first) - self.assertIs(inter, first) - - idx1 = Index([1, 2, 3, 4, 5], name='idx') - # if target has the same name, it is preserved - idx2 = Index([3, 4, 5, 6, 7], name='idx') - expected2 = Index([3, 4, 5], name='idx') - result2 = idx1.intersection(idx2) - self.assertTrue(result2.equals(expected2)) - self.assertEqual(result2.name, expected2.name) - - # if target name is different, it will be reset - idx3 = Index([3, 4, 5, 6, 7], name='other') - expected3 = Index([3, 4, 5], name=None) - result3 = idx1.intersection(idx3) - self.assertTrue(result3.equals(expected3)) - self.assertEqual(result3.name, expected3.name) - - # non monotonic - idx1 = Index([5, 3, 2, 4, 1], name='idx') - idx2 = Index([4, 7, 6, 5, 3], name='idx') - result2 = idx1.intersection(idx2) - self.assertTrue(tm.equalContents(result2, expected2)) - self.assertEqual(result2.name, expected2.name) - - idx3 = Index([4, 7, 6, 5, 3], name='other') - result3 = idx1.intersection(idx3) - self.assertTrue(tm.equalContents(result3, expected3)) - self.assertEqual(result3.name, expected3.name) - - # non-monotonic non-unique - idx1 = Index(['A', 'B', 'A', 'C']) - idx2 = Index(['B', 'D']) - expected = Index(['B'], dtype='object') - result = idx1.intersection(idx2) - self.assertTrue(result.equals(expected)) - - def test_union(self): - first = self.strIndex[5:20] - second = self.strIndex[:10] - everything = self.strIndex[:20] - union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) - - # Corner cases - union = first.union(first) - self.assertIs(union, first) - - union = first.union([]) - self.assertIs(union, first) - - union = Index([]).union(first) - self.assertIs(union, first) - - # preserve names - first.name = 'A' - second.name = 'A' - union = first.union(second) - self.assertEqual(union.name, 'A') - - second.name = 'B' - union = first.union(second) - self.assertIsNone(union.name) - - def test_add(self): - - # - API change GH 8226 - with tm.assert_produces_warning(): - self.strIndex + self.strIndex - with tm.assert_produces_warning(): - self.strIndex + self.strIndex.tolist() - with tm.assert_produces_warning(): - self.strIndex.tolist() + self.strIndex - - with tm.assert_produces_warning(RuntimeWarning): - firstCat = self.strIndex.union(self.dateIndex) - secondCat = self.strIndex.union(self.strIndex) - - if self.dateIndex.dtype == np.object_: - appended = np.append(self.strIndex, self.dateIndex) - else: - appended = np.append(self.strIndex, self.dateIndex.astype('O')) - - self.assertTrue(tm.equalContents(firstCat, appended)) - self.assertTrue(tm.equalContents(secondCat, self.strIndex)) - tm.assert_contains_all(self.strIndex, firstCat) - tm.assert_contains_all(self.strIndex, secondCat) - tm.assert_contains_all(self.dateIndex, firstCat) - - # test add and radd - idx = Index(list('abc')) - expected = Index(['a1', 'b1', 'c1']) - self.assert_index_equal(idx + '1', expected) - expected = Index(['1a', '1b', '1c']) - self.assert_index_equal('1' + idx, expected) - - def test_append_multiple(self): - index = Index(['a', 'b', 'c', 'd', 'e', 'f']) - - foos = [index[:2], index[2:4], index[4:]] - result = foos[0].append(foos[1:]) - self.assertTrue(result.equals(index)) - - # empty - result = index.append([]) - self.assertTrue(result.equals(index)) - - def test_append_empty_preserve_name(self): - left = Index([], name='foo') - right = Index([1, 2, 3], name='foo') - - result = left.append(right) - self.assertEqual(result.name, 'foo') - - left = Index([], name='foo') - right = Index([1, 2, 3], name='bar') - - result = left.append(right) - self.assertIsNone(result.name) - - def test_add_string(self): - # from bug report - index = Index(['a', 'b', 'c']) - index2 = index + 'foo' - - self.assertNotIn('a', index2) - self.assertIn('afoo', index2) - - def test_iadd_string(self): - index = pd.Index(['a', 'b', 'c']) - # doesn't fail test unless there is a check before `+=` - self.assertIn('a', index) - - index += '_x' - self.assertIn('a_x', index) - - def test_difference(self): - - first = self.strIndex[5:20] - second = self.strIndex[:10] - answer = self.strIndex[10:20] - first.name = 'name' - # different names - result = first.difference(second) - - self.assertTrue(tm.equalContents(result, answer)) - self.assertEqual(result.name, None) - - # same names - second.name = 'name' - result = first.difference(second) - self.assertEqual(result.name, 'name') - - # with empty - result = first.difference([]) - self.assertTrue(tm.equalContents(result, first)) - self.assertEqual(result.name, first.name) - - # with everythin - result = first.difference(first) - self.assertEqual(len(result), 0) - self.assertEqual(result.name, first.name) - - def test_symmetric_diff(self): - # smoke - idx1 = Index([1, 2, 3, 4], name='idx1') - idx2 = Index([2, 3, 4, 5]) - result = idx1.sym_diff(idx2) - expected = Index([1, 5]) - self.assertTrue(tm.equalContents(result, expected)) - self.assertIsNone(result.name) - - # __xor__ syntax - expected = idx1 ^ idx2 - self.assertTrue(tm.equalContents(result, expected)) - self.assertIsNone(result.name) - - # multiIndex - idx1 = MultiIndex.from_tuples(self.tuples) - idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) - result = idx1.sym_diff(idx2) - expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) - self.assertTrue(tm.equalContents(result, expected)) - - # nans: - # GH #6444, sorting of nans. Make sure the number of nans is right - # and the correct non-nan values are there. punt on sorting. - idx1 = Index([1, 2, 3, np.nan]) - idx2 = Index([0, 1, np.nan]) - result = idx1.sym_diff(idx2) - # expected = Index([0.0, np.nan, 2.0, 3.0, np.nan]) - - nans = pd.isnull(result) - self.assertEqual(nans.sum(), 1) - self.assertEqual((~nans).sum(), 3) - [self.assertIn(x, result) for x in [0.0, 2.0, 3.0]] - - # other not an Index: - idx1 = Index([1, 2, 3, 4], name='idx1') - idx2 = np.array([2, 3, 4, 5]) - expected = Index([1, 5]) - result = idx1.sym_diff(idx2) - self.assertTrue(tm.equalContents(result, expected)) - self.assertEqual(result.name, 'idx1') - - result = idx1.sym_diff(idx2, result_name='new_name') - self.assertTrue(tm.equalContents(result, expected)) - self.assertEqual(result.name, 'new_name') - - def test_is_numeric(self): - self.assertFalse(self.dateIndex.is_numeric()) - self.assertFalse(self.strIndex.is_numeric()) - self.assertTrue(self.intIndex.is_numeric()) - self.assertTrue(self.floatIndex.is_numeric()) - self.assertFalse(self.catIndex.is_numeric()) - - def test_is_object(self): - self.assertTrue(self.strIndex.is_object()) - self.assertTrue(self.boolIndex.is_object()) - self.assertFalse(self.catIndex.is_object()) - self.assertFalse(self.intIndex.is_object()) - self.assertFalse(self.dateIndex.is_object()) - self.assertFalse(self.floatIndex.is_object()) - - def test_is_all_dates(self): - self.assertTrue(self.dateIndex.is_all_dates) - self.assertFalse(self.strIndex.is_all_dates) - self.assertFalse(self.intIndex.is_all_dates) - - def test_summary(self): - self._check_method_works(Index.summary) - # GH3869 - ind = Index(['{other}%s', "~:{range}:0"], name='A') - result = ind.summary() - # shouldn't be formatted accidentally. - self.assertIn('~:{range}:0', result) - self.assertIn('{other}%s', result) - - def test_format(self): - self._check_method_works(Index.format) - - index = Index([datetime.now()]) - - # windows has different precision on datetime.datetime.now (it doesn't - # include us since the default for Timestamp shows these but Index - # formating does not we are skipping - if not is_platform_windows(): - formatted = index.format() - expected = [str(index[0])] - self.assertEqual(formatted, expected) - - # 2845 - index = Index([1, 2.0 + 3.0j, np.nan]) - formatted = index.format() - expected = [str(index[0]), str(index[1]), u('NaN')] - self.assertEqual(formatted, expected) - - # is this really allowed? - index = Index([1, 2.0 + 3.0j, None]) - formatted = index.format() - expected = [str(index[0]), str(index[1]), u('NaN')] - self.assertEqual(formatted, expected) - - self.strIndex[:0].format() - - def test_format_with_name_time_info(self): - # bug I fixed 12/20/2011 - inc = timedelta(hours=4) - dates = Index([dt + inc for dt in self.dateIndex], name='something') - - formatted = dates.format(name=True) - self.assertEqual(formatted[0], 'something') - - def test_format_datetime_with_time(self): - t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) - - result = t.format() - expected = ['2012-02-07 00:00:00', '2012-02-07 23:00:00'] - self.assertEqual(len(result), 2) - self.assertEqual(result, expected) - - def test_format_none(self): - values = ['a', 'b', 'c', None] - - idx = Index(values) - idx.format() - self.assertIsNone(idx[3]) - - def test_logical_compat(self): - idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) - - def _check_method_works(self, method): - method(self.empty) - method(self.dateIndex) - method(self.unicodeIndex) - method(self.strIndex) - method(self.intIndex) - method(self.tuples) - method(self.catIndex) - - def test_get_indexer(self): - idx1 = Index([1, 2, 3, 4, 5]) - idx2 = Index([2, 4, 6]) - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, [1, 3, -1]) - - r1 = idx2.get_indexer(idx1, method='pad') - e1 = [-1, 0, 0, 1, 1] - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='pad') - assert_almost_equal(r2, e1[::-1]) - - rffill1 = idx2.get_indexer(idx1, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx2.get_indexer(idx1, method='backfill') - e1 = [0, 0, 1, 1, 2] - assert_almost_equal(r1, e1) - - rbfill1 = idx2.get_indexer(idx1, method='bfill') - assert_almost_equal(r1, rbfill1) - - r2 = idx2.get_indexer(idx1[::-1], method='backfill') - assert_almost_equal(r2, e1[::-1]) - - def test_get_indexer_invalid(self): - # GH10411 - idx = Index(np.arange(10)) - - with tm.assertRaisesRegexp(ValueError, 'tolerance argument'): - idx.get_indexer([1, 0], tolerance=1) - - with tm.assertRaisesRegexp(ValueError, 'limit argument'): - idx.get_indexer([1, 0], limit=1) - - def test_get_indexer_nearest(self): - idx = Index(np.arange(10)) - - all_methods = ['pad', 'backfill', 'nearest'] - for method in all_methods: - actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, [0, 5, 9]) - - actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0) - tm.assert_numpy_array_equal(actual, [0, 5, 9]) - - for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], [0, 2, - 9]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, expected) - - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=1) - tm.assert_numpy_array_equal(actual, expected) - - for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], - [0, 2, -1]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, - tolerance=0.2) - tm.assert_numpy_array_equal(actual, expected) - - with tm.assertRaisesRegexp(ValueError, 'limit argument'): - idx.get_indexer([1, 0], method='nearest', limit=1) - - def test_get_indexer_nearest_decreasing(self): - idx = Index(np.arange(10))[::-1] - - all_methods = ['pad', 'backfill', 'nearest'] - for method in all_methods: - actual = idx.get_indexer([0, 5, 9], method=method) - tm.assert_numpy_array_equal(actual, [9, 4, 0]) - - for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1], [9, 7, - 0]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) - tm.assert_numpy_array_equal(actual, expected) - - def test_get_indexer_strings(self): - idx = pd.Index(['b', 'c']) - - actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='pad') - expected = [-1, 0, 1, 1] - tm.assert_numpy_array_equal(actual, expected) - - actual = idx.get_indexer(['a', 'b', 'c', 'd'], method='backfill') - expected = [0, 0, 1, -1] - tm.assert_numpy_array_equal(actual, expected) - - with tm.assertRaises(TypeError): - idx.get_indexer(['a', 'b', 'c', 'd'], method='nearest') - - with tm.assertRaises(TypeError): - idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2) - - def test_get_loc(self): - idx = pd.Index([0, 1, 2]) - all_methods = [None, 'pad', 'backfill', 'nearest'] - for method in all_methods: - self.assertEqual(idx.get_loc(1, method=method), 1) - if method is not None: - self.assertEqual(idx.get_loc(1, method=method, tolerance=0), 1) - with tm.assertRaises(TypeError): - idx.get_loc([1, 2], method=method) - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method), loc) - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method, tolerance=1), loc) - - for method in ['pad', 'backfill', 'nearest']: - with tm.assertRaises(KeyError): - idx.get_loc(1.1, method, tolerance=0.05) - - with tm.assertRaisesRegexp(ValueError, 'must be numeric'): - idx.get_loc(1.1, 'nearest', tolerance='invalid') - with tm.assertRaisesRegexp(ValueError, 'tolerance .* valid if'): - idx.get_loc(1.1, tolerance=1) - - idx = pd.Index(['a', 'c']) - with tm.assertRaises(TypeError): - idx.get_loc('a', method='nearest') - with tm.assertRaises(TypeError): - idx.get_loc('a', method='pad', tolerance='invalid') - - def test_slice_locs(self): - for dtype in [int, float]: - idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) - n = len(idx) - - self.assertEqual(idx.slice_locs(start=2), (2, n)) - self.assertEqual(idx.slice_locs(start=3), (3, n)) - self.assertEqual(idx.slice_locs(3, 8), (3, 6)) - self.assertEqual(idx.slice_locs(5, 10), (3, n)) - self.assertEqual(idx.slice_locs(end=8), (0, 6)) - self.assertEqual(idx.slice_locs(end=9), (0, 7)) - - # reversed - idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(8, 2), (2, 6)) - self.assertEqual(idx2.slice_locs(7, 3), (2, 5)) - - # float slicing - idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=float)) - n = len(idx) - self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) - self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) - idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) - self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) - - # int slicing with floats - idx = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=int)) - self.assertEqual(idx.slice_locs(5.0, 10.0), (3, n)) - self.assertEqual(idx.slice_locs(4.5, 10.5), (3, 8)) - idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(8.5, 1.5), (2, 6)) - self.assertEqual(idx2.slice_locs(10.5, -1), (0, n)) - - def test_slice_locs_dup(self): - idx = Index(['a', 'a', 'b', 'c', 'd', 'd']) - self.assertEqual(idx.slice_locs('a', 'd'), (0, 6)) - self.assertEqual(idx.slice_locs(end='d'), (0, 6)) - self.assertEqual(idx.slice_locs('a', 'c'), (0, 4)) - self.assertEqual(idx.slice_locs('b', 'd'), (2, 6)) - - idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs('d', 'a'), (0, 6)) - self.assertEqual(idx2.slice_locs(end='a'), (0, 6)) - self.assertEqual(idx2.slice_locs('d', 'b'), (0, 4)) - self.assertEqual(idx2.slice_locs('c', 'a'), (2, 6)) - - for dtype in [int, float]: - idx = Index(np.array([10, 12, 12, 14], dtype=dtype)) - self.assertEqual(idx.slice_locs(12, 12), (1, 3)) - self.assertEqual(idx.slice_locs(11, 13), (1, 3)) - - idx2 = idx[::-1] - self.assertEqual(idx2.slice_locs(12, 12), (1, 3)) - self.assertEqual(idx2.slice_locs(13, 11), (1, 3)) - - def test_slice_locs_na(self): - idx = Index([np.nan, 1, 2]) - self.assertRaises(KeyError, idx.slice_locs, start=1.5) - self.assertRaises(KeyError, idx.slice_locs, end=1.5) - self.assertEqual(idx.slice_locs(1), (1, 3)) - self.assertEqual(idx.slice_locs(np.nan), (0, 3)) - - idx = Index([0, np.nan, np.nan, 1, 2]) - self.assertEqual(idx.slice_locs(np.nan), (1, 5)) - - def test_slice_locs_negative_step(self): - idx = Index(list('bcdxy')) - - SLC = pd.IndexSlice - - def check_slice(in_slice, expected): - s_start, s_stop = idx.slice_locs(in_slice.start, in_slice.stop, - in_slice.step) - result = idx[s_start:s_stop:in_slice.step] - expected = pd.Index(list(expected)) - self.assertTrue(result.equals(expected)) - - for in_slice, expected in [ - (SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''), - (SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'), - (SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'), - (SLC['y'::-4], 'yb'), - # absent labels - (SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'), - (SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'), - (SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'), - (SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''), - (SLC['m':'m':-1], '') - ]: - check_slice(in_slice, expected) - - def test_drop(self): - n = len(self.strIndex) - - drop = self.strIndex[lrange(5, 10)] - dropped = self.strIndex.drop(drop) - expected = self.strIndex[lrange(5) + lrange(10, n)] - self.assertTrue(dropped.equals(expected)) - - self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) - self.assertRaises(ValueError, self.strIndex.drop, ['1', 'bar']) - - # errors='ignore' - mixed = drop.tolist() + ['foo'] - dropped = self.strIndex.drop(mixed, errors='ignore') - expected = self.strIndex[lrange(5) + lrange(10, n)] - self.assert_index_equal(dropped, expected) - - dropped = self.strIndex.drop(['foo', 'bar'], errors='ignore') - expected = self.strIndex[lrange(n)] - self.assert_index_equal(dropped, expected) - - dropped = self.strIndex.drop(self.strIndex[0]) - expected = self.strIndex[1:] - self.assert_index_equal(dropped, expected) - - ser = Index([1, 2, 3]) - dropped = ser.drop(1) - expected = Index([2, 3]) - self.assert_index_equal(dropped, expected) - - # errors='ignore' - self.assertRaises(ValueError, ser.drop, [3, 4]) - - dropped = ser.drop(4, errors='ignore') - expected = Index([1, 2, 3]) - self.assert_index_equal(dropped, expected) - - dropped = ser.drop([3, 4, 5], errors='ignore') - expected = Index([1, 2]) - self.assert_index_equal(dropped, expected) - - def test_tuple_union_bug(self): - import pandas - import numpy as np - - aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], - dtype=[('num', int), ('let', 'a1')]) - aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), - (2, 'B'), (1, 'C'), (2, 'C')], - dtype=[('num', int), ('let', 'a1')]) - - idx1 = pandas.Index(aidx1) - idx2 = pandas.Index(aidx2) - - # intersection broken? - int_idx = idx1.intersection(idx2) - # needs to be 1d like idx1 and idx2 - expected = idx1[:4] # pandas.Index(sorted(set(idx1) & set(idx2))) - self.assertEqual(int_idx.ndim, 1) - self.assertTrue(int_idx.equals(expected)) - - # union broken - union_idx = idx1.union(idx2) - expected = idx2 - self.assertEqual(union_idx.ndim, 1) - self.assertTrue(union_idx.equals(expected)) - - def test_is_monotonic_incomparable(self): - index = Index([5, datetime.now(), 7]) - self.assertFalse(index.is_monotonic) - self.assertFalse(index.is_monotonic_decreasing) - - def test_get_set_value(self): - values = np.random.randn(100) - date = self.dateIndex[67] - - assert_almost_equal(self.dateIndex.get_value(values, date), values[67]) - - self.dateIndex.set_value(values, date, 10) - self.assertEqual(values[67], 10) - - def test_isin(self): - values = ['foo', 'bar', 'quux'] - - idx = Index(['qux', 'baz', 'foo', 'bar']) - result = idx.isin(values) - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - # empty, return dtype bool - idx = Index([]) - result = idx.isin(values) - self.assertEqual(len(result), 0) - self.assertEqual(result.dtype, np.bool_) - - def test_isin_nan(self): - tm.assert_numpy_array_equal( - Index(['a', np.nan]).isin([np.nan]), [False, True]) - tm.assert_numpy_array_equal( - Index(['a', pd.NaT]).isin([pd.NaT]), [False, True]) - tm.assert_numpy_array_equal( - Index(['a', np.nan]).isin([float('nan')]), [False, False]) - tm.assert_numpy_array_equal( - Index(['a', np.nan]).isin([pd.NaT]), [False, False]) - # Float64Index overrides isin, so must be checked separately - tm.assert_numpy_array_equal( - Float64Index([1.0, np.nan]).isin([np.nan]), [False, True]) - tm.assert_numpy_array_equal( - Float64Index([1.0, np.nan]).isin([float('nan')]), [False, True]) - tm.assert_numpy_array_equal( - Float64Index([1.0, np.nan]).isin([pd.NaT]), [False, True]) - - def test_isin_level_kwarg(self): - def check_idx(idx): - values = idx.tolist()[-2:] + ['nonexisting'] - - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(expected, idx.isin(values, level=0)) - tm.assert_numpy_array_equal(expected, idx.isin(values, level=-1)) - - self.assertRaises(IndexError, idx.isin, values, level=1) - self.assertRaises(IndexError, idx.isin, values, level=10) - self.assertRaises(IndexError, idx.isin, values, level=-2) - - self.assertRaises(KeyError, idx.isin, values, level=1.0) - self.assertRaises(KeyError, idx.isin, values, level='foobar') - - idx.name = 'foobar' - tm.assert_numpy_array_equal(expected, - idx.isin(values, level='foobar')) - - self.assertRaises(KeyError, idx.isin, values, level='xyzzy') - self.assertRaises(KeyError, idx.isin, values, level=np.nan) - - check_idx(Index(['qux', 'baz', 'foo', 'bar'])) - # Float64Index overrides isin, so must be checked separately - check_idx(Float64Index([1.0, 2.0, 3.0, 4.0])) - - def test_boolean_cmp(self): - values = [1, 2, 3, 4] - - idx = Index(values) - res = (idx == values) - - tm.assert_numpy_array_equal(res, np.array( - [True, True, True, True], dtype=bool)) - - def test_get_level_values(self): - result = self.strIndex.get_level_values(0) - self.assertTrue(result.equals(self.strIndex)) - - def test_slice_keep_name(self): - idx = Index(['a', 'b'], name='asdf') - self.assertEqual(idx.name, idx[1:].name) - - def test_join_self(self): - # instance attributes of the form self.Index - indices = 'unicode', 'str', 'date', 'int', 'float' - kinds = 'outer', 'inner', 'left', 'right' - for index_kind in indices: - res = getattr(self, '{0}Index'.format(index_kind)) - - for kind in kinds: - joined = res.join(res, how=kind) - self.assertIs(res, joined) - - def test_str_attribute(self): - # GH9068 - methods = ['strip', 'rstrip', 'lstrip'] - idx = Index([' jack', 'jill ', ' jesse ', 'frank']) - for method in methods: - expected = Index([getattr(str, method)(x) for x in idx.values]) - tm.assert_index_equal( - getattr(Index.str, method)(idx.str), expected) - - # create a few instances that are not able to use .str accessor - indices = [Index(range(5)), tm.makeDateIndex(10), - MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), - PeriodIndex(start='2000', end='2010', freq='A')] - for idx in indices: - with self.assertRaisesRegexp(AttributeError, - 'only use .str accessor'): - idx.str.repeat(2) - - idx = Index(['a b c', 'd e', 'f']) - expected = Index([['a', 'b', 'c'], ['d', 'e'], ['f']]) - tm.assert_index_equal(idx.str.split(), expected) - tm.assert_index_equal(idx.str.split(expand=False), expected) - - expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan), - ('f', np.nan, np.nan)]) - tm.assert_index_equal(idx.str.split(expand=True), expected) - - # test boolean case, should return np.array instead of boolean Index - idx = Index(['a1', 'a2', 'b1', 'b2']) - expected = np.array([True, True, False, False]) - tm.assert_numpy_array_equal(idx.str.startswith('a'), expected) - self.assertIsInstance(idx.str.startswith('a'), np.ndarray) - s = Series(range(4), index=idx) - expected = Series(range(2), index=['a1', 'a2']) - tm.assert_series_equal(s[s.index.str.startswith('a')], expected) - - def test_tab_completion(self): - # GH 9910 - idx = Index(list('abcd')) - self.assertTrue('str' in dir(idx)) - - idx = Index(range(4)) - self.assertTrue('str' not in dir(idx)) - - def test_indexing_doesnt_change_class(self): - idx = Index([1, 2, 3, 'a', 'b', 'c']) - - self.assertTrue(idx[1:3].identical(pd.Index([2, 3], dtype=np.object_))) - self.assertTrue(idx[[0, 1]].identical(pd.Index( - [1, 2], dtype=np.object_))) - - def test_outer_join_sort(self): - left_idx = Index(np.random.permutation(15)) - right_idx = tm.makeDateIndex(10) - - with tm.assert_produces_warning(RuntimeWarning): - joined = left_idx.join(right_idx, how='outer') - - # right_idx in this case because DatetimeIndex has join precedence over - # Int64Index - with tm.assert_produces_warning(RuntimeWarning): - expected = right_idx.astype(object).union(left_idx.astype(object)) - tm.assert_index_equal(joined, expected) - - def test_nan_first_take_datetime(self): - idx = Index([pd.NaT, Timestamp('20130101'), Timestamp('20130102')]) - res = idx.take([-1, 0, 1]) - exp = Index([idx[-1], idx[0], idx[1]]) - tm.assert_index_equal(res, exp) - - def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): - # GH6552 - idx = pd.Index([0, 1, 2]) - - dt_idx = pd.date_range('20130101', periods=3) - - idx.name = None - self.assertEqual(idx.reindex([])[0].name, None) - self.assertEqual(idx.reindex(np.array([]))[0].name, None) - self.assertEqual(idx.reindex(idx.tolist())[0].name, None) - self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, None) - self.assertEqual(idx.reindex(idx.values)[0].name, None) - self.assertEqual(idx.reindex(idx.values[:-1])[0].name, None) - - # Must preserve name even if dtype changes. - self.assertEqual(idx.reindex(dt_idx.values)[0].name, None) - self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, None) - - idx.name = 'foobar' - self.assertEqual(idx.reindex([])[0].name, 'foobar') - self.assertEqual(idx.reindex(np.array([]))[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.tolist())[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.values)[0].name, 'foobar') - self.assertEqual(idx.reindex(idx.values[:-1])[0].name, 'foobar') - - # Must preserve name even if dtype changes. - self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar') - self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar') - - def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): - # GH7774 - idx = pd.Index(list('abc')) - - def get_reindex_type(target): - return idx.reindex(target)[0].dtype.type - - self.assertEqual(get_reindex_type([]), np.object_) - self.assertEqual(get_reindex_type(np.array([])), np.object_) - self.assertEqual(get_reindex_type(np.array([], dtype=np.int64)), - np.object_) - - def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): - # GH7774 - idx = pd.Index(list('abc')) - - def get_reindex_type(target): - return idx.reindex(target)[0].dtype.type - - self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64) - self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64) - self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64) - - reindexed = idx.reindex(pd.MultiIndex( - [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0] - self.assertEqual(reindexed.levels[0].dtype.type, np.int64) - self.assertEqual(reindexed.levels[1].dtype.type, np.float64) - - def test_groupby(self): - idx = Index(range(5)) - groups = idx.groupby(np.array([1, 1, 2, 2, 2])) - exp = {1: [0, 1], 2: [2, 3, 4]} - tm.assert_dict_equal(groups, exp) - - def test_equals_op_multiindex(self): - # GH9785 - # test comparisons of multiindex - from pandas.compat import StringIO - df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1]) - tm.assert_numpy_array_equal(df.index == df.index, - np.array([True, True])) - - mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)]) - tm.assert_numpy_array_equal(df.index == mi1, np.array([True, True])) - mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)]) - tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False])) - mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - df.index == mi3 - - index_a = Index(['foo', 'bar', 'baz']) - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - df.index == index_a - tm.assert_numpy_array_equal(index_a == mi3, - np.array([False, False, False])) - - def test_conversion_preserves_name(self): - # GH 10875 - i = pd.Index(['01:02:03', '01:02:04'], name='label') - self.assertEqual(i.name, pd.to_datetime(i).name) - self.assertEqual(i.name, pd.to_timedelta(i).name) - - def test_string_index_repr(self): - # py3/py2 repr can differ because of "u" prefix - # which also affects to displayed element size - - # short - idx = pd.Index(['a', 'bb', 'ccc']) - if PY3: - expected = u"""Index(['a', 'bb', 'ccc'], dtype='object')""" - self.assertEqual(repr(idx), expected) - else: - expected = u"""Index([u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(unicode(idx), expected) - - # multiple lines - idx = pd.Index(['a', 'bb', 'ccc'] * 10) - if PY3: - expected = u"""\ -Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', - 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', - 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - dtype='object')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""\ -Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', - u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', - u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - dtype='object')""" - - self.assertEqual(unicode(idx), expected) - - # truncated - idx = pd.Index(['a', 'bb', 'ccc'] * 100) - if PY3: - expected = u"""\ -Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', - ... - 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - dtype='object', length=300)""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""\ -Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', - ... - u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - dtype='object', length=300)""" - - self.assertEqual(unicode(idx), expected) - - # short - idx = pd.Index([u'あ', u'いい', u'ううう']) - if PY3: - expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')""" - self.assertEqual(repr(idx), expected) - else: - expected = u"""\ -Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(unicode(idx), expected) - - # multiple lines - idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) - if PY3: - expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - dtype='object')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - dtype='object')""" - - self.assertEqual(unicode(idx), expected) - - # truncated - idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) - if PY3: - expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', - ... - 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - dtype='object', length=300)""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - ... - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - dtype='object', length=300)""" - - self.assertEqual(unicode(idx), expected) - - # Emable Unicode option ----------------------------------------- - with cf.option_context('display.unicode.east_asian_width', True): - - # short - idx = pd.Index([u'あ', u'いい', u'ううう']) - if PY3: - expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')""" - self.assertEqual(repr(idx), expected) - else: - expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(unicode(idx), expected) - - # multiple lines - idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) - if PY3: - expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', 'いい', 'ううう'], - dtype='object')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - dtype='object')""" - - self.assertEqual(unicode(idx), expected) - - # truncated - idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) - if PY3: - expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', - ... - 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', - 'ううう'], - dtype='object', length=300)""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', - u'ううう', u'あ', - ... - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう'], - dtype='object', length=300)""" - - self.assertEqual(unicode(idx), expected) - - -class TestCategoricalIndex(Base, tm.TestCase): - _holder = CategoricalIndex - - def setUp(self): - self.indices = dict(catIndex=tm.makeCategoricalIndex(100)) - self.setup_indices() - - def create_index(self, categories=None, ordered=False): - if categories is None: - categories = list('cab') - return CategoricalIndex( - list('aabbca'), categories=categories, ordered=ordered) - - def test_construction(self): - - ci = self.create_index(categories=list('abcd')) - categories = ci.categories - - result = Index(ci) - tm.assert_index_equal(result, ci, exact=True) - self.assertFalse(result.ordered) - - result = Index(ci.values) - tm.assert_index_equal(result, ci, exact=True) - self.assertFalse(result.ordered) - - # empty - result = CategoricalIndex(categories=categories) - self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) - self.assertFalse(result.ordered) - - # passing categories - result = CategoricalIndex(list('aabbca'), categories=categories) - self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes, np.array( - [0, 0, 1, 1, 2, 0], dtype='int8')) - - c = pd.Categorical(list('aabbca')) - result = CategoricalIndex(c) - self.assertTrue(result.categories.equals(Index(list('abc')))) - tm.assert_numpy_array_equal(result.codes, np.array( - [0, 0, 1, 1, 2, 0], dtype='int8')) - self.assertFalse(result.ordered) - - result = CategoricalIndex(c, categories=categories) - self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes, np.array( - [0, 0, 1, 1, 2, 0], dtype='int8')) - self.assertFalse(result.ordered) - - ci = CategoricalIndex(c, categories=list('abcd')) - result = CategoricalIndex(ci) - self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes, np.array( - [0, 0, 1, 1, 2, 0], dtype='int8')) - self.assertFalse(result.ordered) - - result = CategoricalIndex(ci, categories=list('ab')) - self.assertTrue(result.categories.equals(Index(list('ab')))) - tm.assert_numpy_array_equal(result.codes, np.array( - [0, 0, 1, 1, -1, 0], dtype='int8')) - self.assertFalse(result.ordered) - - result = CategoricalIndex(ci, categories=list('ab'), ordered=True) - self.assertTrue(result.categories.equals(Index(list('ab')))) - tm.assert_numpy_array_equal(result.codes, np.array( - [0, 0, 1, 1, -1, 0], dtype='int8')) - self.assertTrue(result.ordered) - - # turn me to an Index - result = Index(np.array(ci)) - self.assertIsInstance(result, Index) - self.assertNotIsInstance(result, CategoricalIndex) - - def test_construction_with_dtype(self): - - # specify dtype - ci = self.create_index(categories=list('abc')) - - result = Index(np.array(ci), dtype='category') - tm.assert_index_equal(result, ci, exact=True) - - result = Index(np.array(ci).tolist(), dtype='category') - tm.assert_index_equal(result, ci, exact=True) - - # these are generally only equal when the categories are reordered - ci = self.create_index() - - result = Index( - np.array(ci), dtype='category').reorder_categories(ci.categories) - tm.assert_index_equal(result, ci, exact=True) - - # make sure indexes are handled - expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], - ordered=True) - idx = Index(range(3)) - result = CategoricalIndex(idx, categories=idx, ordered=True) - tm.assert_index_equal(result, expected, exact=True) - - def test_disallow_set_ops(self): - - # GH 10039 - # set ops (+/-) raise TypeError - idx = pd.Index(pd.Categorical(['a', 'b'])) - - self.assertRaises(TypeError, lambda: idx - idx) - self.assertRaises(TypeError, lambda: idx + idx) - self.assertRaises(TypeError, lambda: idx - ['a', 'b']) - self.assertRaises(TypeError, lambda: idx + ['a', 'b']) - self.assertRaises(TypeError, lambda: ['a', 'b'] - idx) - self.assertRaises(TypeError, lambda: ['a', 'b'] + idx) - - def test_method_delegation(self): - - ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) - result = ci.set_categories(list('cab')) - tm.assert_index_equal(result, CategoricalIndex( - list('aabbca'), categories=list('cab'))) - - ci = CategoricalIndex(list('aabbca'), categories=list('cab')) - result = ci.rename_categories(list('efg')) - tm.assert_index_equal(result, CategoricalIndex( - list('ffggef'), categories=list('efg'))) - - ci = CategoricalIndex(list('aabbca'), categories=list('cab')) - result = ci.add_categories(['d']) - tm.assert_index_equal(result, CategoricalIndex( - list('aabbca'), categories=list('cabd'))) - - ci = CategoricalIndex(list('aabbca'), categories=list('cab')) - result = ci.remove_categories(['c']) - tm.assert_index_equal(result, CategoricalIndex( - list('aabb') + [np.nan] + ['a'], categories=list('ab'))) - - ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) - result = ci.as_unordered() - tm.assert_index_equal(result, ci) - - ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) - result = ci.as_ordered() - tm.assert_index_equal(result, CategoricalIndex( - list('aabbca'), categories=list('cabdef'), ordered=True)) - - # invalid - self.assertRaises(ValueError, lambda: ci.set_categories( - list('cab'), inplace=True)) - - def test_contains(self): - - ci = self.create_index(categories=list('cabdef')) - - self.assertTrue('a' in ci) - self.assertTrue('z' not in ci) - self.assertTrue('e' not in ci) - self.assertTrue(np.nan not in ci) - - # assert codes NOT in index - self.assertFalse(0 in ci) - self.assertFalse(1 in ci) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex( - list('aabbca'), categories=list('cabdef') + [np.nan]) - self.assertFalse(np.nan in ci) - - ci = CategoricalIndex( - list('aabbca') + [np.nan], categories=list('cabdef')) - self.assertTrue(np.nan in ci) - - def test_min_max(self): - - ci = self.create_index(ordered=False) - self.assertRaises(TypeError, lambda: ci.min()) - self.assertRaises(TypeError, lambda: ci.max()) - - ci = self.create_index(ordered=True) - - self.assertEqual(ci.min(), 'c') - self.assertEqual(ci.max(), 'b') - - def test_append(self): - - ci = self.create_index() - categories = ci.categories - - # append cats with the same categories - result = ci[:3].append(ci[3:]) - tm.assert_index_equal(result, ci, exact=True) - - foos = [ci[:1], ci[1:3], ci[3:]] - result = foos[0].append(foos[1:]) - tm.assert_index_equal(result, ci, exact=True) - - # empty - result = ci.append([]) - tm.assert_index_equal(result, ci, exact=True) - - # appending with different categories or reoreded is not ok - self.assertRaises( - TypeError, - lambda: ci.append(ci.values.set_categories(list('abcd')))) - self.assertRaises( - TypeError, - lambda: ci.append(ci.values.reorder_categories(list('abc')))) - - # with objects - result = ci.append(['c', 'a']) - expected = CategoricalIndex(list('aabbcaca'), categories=categories) - tm.assert_index_equal(result, expected, exact=True) - - # invalid objects - self.assertRaises(TypeError, lambda: ci.append(['a', 'd'])) - - def test_insert(self): - - ci = self.create_index() - categories = ci.categories - - # test 0th element - result = ci.insert(0, 'a') - expected = CategoricalIndex(list('aaabbca'), categories=categories) - tm.assert_index_equal(result, expected, exact=True) - - # test Nth element that follows Python list behavior - result = ci.insert(-1, 'a') - expected = CategoricalIndex(list('aabbcaa'), categories=categories) - tm.assert_index_equal(result, expected, exact=True) - - # test empty - result = CategoricalIndex(categories=categories).insert(0, 'a') - expected = CategoricalIndex(['a'], categories=categories) - tm.assert_index_equal(result, expected, exact=True) - - # invalid - self.assertRaises(TypeError, lambda: ci.insert(0, 'd')) - - def test_delete(self): - - ci = self.create_index() - categories = ci.categories - - result = ci.delete(0) - expected = CategoricalIndex(list('abbca'), categories=categories) - tm.assert_index_equal(result, expected, exact=True) - - result = ci.delete(-1) - expected = CategoricalIndex(list('aabbc'), categories=categories) - tm.assert_index_equal(result, expected, exact=True) - - with tm.assertRaises((IndexError, ValueError)): - # either depeidnig on numpy version - result = ci.delete(10) - - def test_astype(self): - - ci = self.create_index() - result = ci.astype('category') - tm.assert_index_equal(result, ci, exact=True) - - result = ci.astype(object) - self.assertTrue(result.equals(Index(np.array(ci)))) - - # this IS equal, but not the same class - self.assertTrue(result.equals(ci)) - self.assertIsInstance(result, Index) - self.assertNotIsInstance(result, CategoricalIndex) - - def test_reindex_base(self): - - # determined by cat ordering - idx = self.create_index() - expected = np.array([4, 0, 1, 5, 2, 3]) - - actual = idx.get_indexer(idx) - tm.assert_numpy_array_equal(expected, actual) - - with tm.assertRaisesRegexp(ValueError, 'Invalid fill method'): - idx.get_indexer(idx, method='invalid') - - def test_reindexing(self): - - ci = self.create_index() - oidx = Index(np.array(ci)) - - for n in [1, 2, 5, len(ci)]: - finder = oidx[np.random.randint(0, len(ci), size=n)] - expected = oidx.get_indexer_non_unique(finder)[0] - - actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal(expected, actual) - - def test_reindex_dtype(self): - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c' - ]) - tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex( - Categorical(['a', 'c'])) - tm.assert_index_equal(res, CategoricalIndex( - ['a', 'a', 'c'], categories=['a', 'c']), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - - res, indexer = CategoricalIndex( - ['a', 'b', 'c', 'a' - ], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c']) - tm.assert_index_equal(res, Index( - ['a', 'a', 'c'], dtype='object'), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - - res, indexer = CategoricalIndex( - ['a', 'b', 'c', 'a'], - categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c'])) - tm.assert_index_equal(res, CategoricalIndex( - ['a', 'a', 'c'], categories=['a', 'c']), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - - def test_duplicates(self): - - idx = CategoricalIndex([0, 0, 0], name='foo') - self.assertFalse(idx.is_unique) - self.assertTrue(idx.has_duplicates) - - expected = CategoricalIndex([0], name='foo') - self.assert_index_equal(idx.drop_duplicates(), expected) - - def test_get_indexer(self): - - idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) - idx2 = CategoricalIndex(list('abf')) - - for indexer in [idx2, list('abf'), Index(list('abf'))]: - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, [0, 1, 2, -1]) - - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='pad')) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='backfill')) - self.assertRaises(NotImplementedError, - lambda: idx2.get_indexer(idx1, method='nearest')) - - def test_repr_roundtrip(self): - - ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) - str(ci) - tm.assert_index_equal(eval(repr(ci)), ci, exact=True) - - # formatting - if PY3: - str(ci) - else: - compat.text_type(ci) - - # long format - # this is not reprable - ci = CategoricalIndex(np.random.randint(0, 5, size=100)) - if PY3: - str(ci) - else: - compat.text_type(ci) - - def test_isin(self): - - ci = CategoricalIndex( - list('aabca') + [np.nan], categories=['c', 'a', 'b']) - tm.assert_numpy_array_equal( - ci.isin(['c']), - np.array([False, False, False, True, False, False])) - tm.assert_numpy_array_equal( - ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False])) - tm.assert_numpy_array_equal( - ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) - - # mismatched categorical -> coerced to ndarray so doesn't matter - tm.assert_numpy_array_equal( - ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * - 6)) - tm.assert_numpy_array_equal( - ci.isin(ci.set_categories(list('defghi'))), - np.array([False] * 5 + [True])) - - def test_identical(self): - - ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) - ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], - ordered=True) - self.assertTrue(ci1.identical(ci1)) - self.assertTrue(ci1.identical(ci1.copy())) - self.assertFalse(ci1.identical(ci2)) - - def test_equals(self): - - ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) - ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], - ordered=True) - - self.assertTrue(ci1.equals(ci1)) - self.assertFalse(ci1.equals(ci2)) - self.assertTrue(ci1.equals(ci1.astype(object))) - self.assertTrue(ci1.astype(object).equals(ci1)) - - self.assertTrue((ci1 == ci1).all()) - self.assertFalse((ci1 != ci1).all()) - self.assertFalse((ci1 > ci1).all()) - self.assertFalse((ci1 < ci1).all()) - self.assertTrue((ci1 <= ci1).all()) - self.assertTrue((ci1 >= ci1).all()) - - self.assertFalse((ci1 == 1).all()) - self.assertTrue((ci1 == Index(['a', 'b'])).all()) - self.assertTrue((ci1 == ci1.values).all()) - - # invalid comparisons - with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - ci1 == Index(['a', 'b', 'c']) - self.assertRaises(TypeError, lambda: ci1 == ci2) - self.assertRaises( - TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) - self.assertRaises( - TypeError, - lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) - - # tests - # make sure that we are testing for category inclusion properly - self.assertTrue(CategoricalIndex( - list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca'))) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(CategoricalIndex( - list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list( - 'aabca'))) - - self.assertFalse(CategoricalIndex( - list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( - 'aabca'))) - self.assertTrue(CategoricalIndex( - list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( - 'aabca') + [np.nan])) - - def test_string_categorical_index_repr(self): - # short - idx = pd.CategoricalIndex(['a', 'bb', 'ccc']) - if PY3: - expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" - self.assertEqual(unicode(idx), expected) - - # multiple lines - idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 10) - if PY3: - expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', - 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', - 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', - u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', - u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', - u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" - - self.assertEqual(unicode(idx), expected) - - # truncated - idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 100) - if PY3: - expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', - ... - 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], - categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', - u'ccc', u'a', - ... - u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', - u'bb', u'ccc'], - categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" - - self.assertEqual(unicode(idx), expected) - - # larger categories - idx = pd.CategoricalIndex(list('abcdefghijklmmo')) - if PY3: - expected = u"""CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', - 'm', 'm', 'o'], - categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j', - u'k', u'l', u'm', u'm', u'o'], - categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')""" - - self.assertEqual(unicode(idx), expected) - - # short - idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) - if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" - self.assertEqual(unicode(idx), expected) - - # multiple lines - idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) - if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', - 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', - 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', - u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" - - self.assertEqual(unicode(idx), expected) - - # truncated - idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) - if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', - ... - 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', - u'ううう', u'あ', - ... - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" - - self.assertEqual(unicode(idx), expected) - - # larger categories - idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) - if PY3: - expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', - 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', - u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" - - self.assertEqual(unicode(idx), expected) - - # Emable Unicode option ----------------------------------------- - with cf.option_context('display.unicode.east_asian_width', True): - - # short - idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう']) - if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" - self.assertEqual(unicode(idx), expected) - - # multiple lines - idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10) - if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', - 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', - 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" - - self.assertEqual(unicode(idx), expected) - - # truncated - idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100) - if PY3: - expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', - 'ううう', 'あ', - ... - 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', - 'あ', 'いい', 'ううう'], - categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', - u'いい', u'ううう', u'あ', - ... - u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', - u'ううう', u'あ', u'いい', u'ううう'], - categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" - - self.assertEqual(unicode(idx), expected) - - # larger categories - idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ')) - if PY3: - expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', - 'さ', 'し', 'す', 'せ', 'そ'], - categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" - - self.assertEqual(repr(idx), expected) - else: - expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', - u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], - categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" - - self.assertEqual(unicode(idx), expected) - - def test_fillna_categorical(self): - # GH 11343 - idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x') - # fill by value in categories - exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x') - self.assert_index_equal(idx.fillna(1.0), exp) - - # fill by value not in categories raises ValueError - with tm.assertRaisesRegexp(ValueError, - 'fill value must be in categories'): - idx.fillna(2.0) - - -class Numeric(Base): - - def test_numeric_compat(self): - - idx = self.create_index() - didx = idx * idx - - result = idx * 1 - tm.assert_index_equal(result, idx) - - result = 1 * idx - tm.assert_index_equal(result, idx) - - # in general not true for RangeIndex - if not isinstance(idx, RangeIndex): - result = idx * idx - tm.assert_index_equal(result, idx ** 2) - - # truediv under PY3 - result = idx / 1 - expected = idx - if PY3: - expected = expected.astype('float64') - tm.assert_index_equal(result, expected) - - result = idx / 2 - if PY3: - expected = expected.astype('float64') - expected = Index(idx.values / 2) - tm.assert_index_equal(result, expected) - - result = idx // 1 - tm.assert_index_equal(result, idx) - - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, idx * 5) - - result = idx * np.arange(5, dtype='int64') - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - expected = Float64Index(np.arange(5, dtype='float64') * - (np.arange(5, dtype='float64') + 0.1)) - tm.assert_index_equal(result, expected) - - # invalid - self.assertRaises(TypeError, - lambda: idx * date_range('20130101', periods=5)) - self.assertRaises(ValueError, lambda: idx * idx[0:3]) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) - - def test_explicit_conversions(self): - - # GH 8608 - # add/sub are overriden explicity for Float/Int Index - idx = self._holder(np.arange(5, dtype='int64')) - - # float conversions - arr = np.arange(5, dtype='int64') * 3.2 - expected = Float64Index(arr) - fidx = idx * 3.2 - tm.assert_index_equal(fidx, expected) - fidx = 3.2 * idx - tm.assert_index_equal(fidx, expected) - - # interops with numpy arrays - expected = Float64Index(arr) - a = np.zeros(5, dtype='float64') - result = fidx - a - tm.assert_index_equal(result, expected) - - expected = Float64Index(-arr) - a = np.zeros(5, dtype='float64') - result = a - fidx - tm.assert_index_equal(result, expected) - - def test_ufunc_compat(self): - idx = self._holder(np.arange(5, dtype='int64')) - result = np.sin(idx) - expected = Float64Index(np.sin(np.arange(5, dtype='int64'))) - tm.assert_index_equal(result, expected) - - def test_index_groupby(self): - int_idx = Index(range(6)) - float_idx = Index(np.arange(0, 0.6, 0.1)) - obj_idx = Index('A B C D E F'.split()) - dt_idx = pd.date_range('2013-01-01', freq='M', periods=6) - - for idx in [int_idx, float_idx, obj_idx, dt_idx]: - to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) - self.assertEqual(idx.groupby(to_groupby), - {1.0: [idx[0], idx[5]], 2.0: [idx[1], idx[4]]}) - - to_groupby = Index([datetime(2011, 11, 1), - datetime(2011, 12, 1), - pd.NaT, - pd.NaT, - datetime(2011, 12, 1), - datetime(2011, 11, 1)], - tz='UTC').values - - ex_keys = pd.tslib.datetime_to_datetime64(np.array([Timestamp( - '2011-11-01'), Timestamp('2011-12-01')])) - expected = {ex_keys[0][0]: [idx[0], idx[5]], - ex_keys[0][1]: [idx[1], idx[4]]} - self.assertEqual(idx.groupby(to_groupby), expected) - - def test_modulo(self): - # GH 9244 - index = self.create_index() - expected = Index(index.values % 2) - self.assert_index_equal(index % 2, expected) - - -class TestFloat64Index(Numeric, tm.TestCase): - _holder = Float64Index - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), - float=Float64Index(np.arange(5) * 2.5)) - self.setup_indices() - - def create_index(self): - return Float64Index(np.arange(5, dtype='float64')) - - def test_repr_roundtrip(self): - for ind in (self.mixed, self.float): - tm.assert_index_equal(eval(repr(ind)), ind) - - def check_is_index(self, i): - self.assertIsInstance(i, Index) - self.assertNotIsInstance(i, Float64Index) - - def check_coerce(self, a, b, is_float_index=True): - self.assertTrue(a.equals(b)) - if is_float_index: - self.assertIsInstance(b, Float64Index) - else: - self.check_is_index(b) - - def test_constructor(self): - - # explicit construction - index = Float64Index([1, 2, 3, 4, 5]) - self.assertIsInstance(index, Float64Index) - self.assertTrue((index.values == np.array( - [1, 2, 3, 4, 5], dtype='float64')).all()) - index = Float64Index(np.array([1, 2, 3, 4, 5])) - self.assertIsInstance(index, Float64Index) - index = Float64Index([1., 2, 3, 4, 5]) - self.assertIsInstance(index, Float64Index) - index = Float64Index(np.array([1., 2, 3, 4, 5])) - self.assertIsInstance(index, Float64Index) - self.assertEqual(index.dtype, float) - - index = Float64Index(np.array([1., 2, 3, 4, 5]), dtype=np.float32) - self.assertIsInstance(index, Float64Index) - self.assertEqual(index.dtype, np.float64) - - index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) - self.assertIsInstance(index, Float64Index) - self.assertEqual(index.dtype, np.float64) - - # nan handling - result = Float64Index([np.nan, np.nan]) - self.assertTrue(pd.isnull(result.values).all()) - result = Float64Index(np.array([np.nan])) - self.assertTrue(pd.isnull(result.values).all()) - result = Index(np.array([np.nan])) - self.assertTrue(pd.isnull(result.values).all()) - - def test_constructor_invalid(self): - - # invalid - self.assertRaises(TypeError, Float64Index, 0.) - self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.]) - self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')]) - - def test_constructor_coerce(self): - - self.check_coerce(self.mixed, Index([1.5, 2, 3, 4, 5])) - self.check_coerce(self.float, Index(np.arange(5) * 2.5)) - self.check_coerce(self.float, Index(np.array( - np.arange(5) * 2.5, dtype=object))) - - def test_constructor_explicit(self): - - # these don't auto convert - self.check_coerce(self.float, - Index((np.arange(5) * 2.5), dtype=object), - is_float_index=False) - self.check_coerce(self.mixed, Index( - [1.5, 2, 3, 4, 5], dtype=object), is_float_index=False) - - def test_astype(self): - - result = self.float.astype(object) - self.assertTrue(result.equals(self.float)) - self.assertTrue(self.float.equals(result)) - self.check_is_index(result) - - i = self.mixed.copy() - i.name = 'foo' - result = i.astype(object) - self.assertTrue(result.equals(i)) - self.assertTrue(i.equals(result)) - self.check_is_index(result) - - def test_equals(self): - - i = Float64Index([1.0, 2.0]) - self.assertTrue(i.equals(i)) - self.assertTrue(i.identical(i)) - - i2 = Float64Index([1.0, 2.0]) - self.assertTrue(i.equals(i2)) - - i = Float64Index([1.0, np.nan]) - self.assertTrue(i.equals(i)) - self.assertTrue(i.identical(i)) - - i2 = Float64Index([1.0, np.nan]) - self.assertTrue(i.equals(i2)) - - def test_get_indexer(self): - idx = Float64Index([0.0, 1.0, 2.0]) - tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) - - target = [-0.1, 0.5, 1.1] - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest'), [0, 1, 1]) - - def test_get_loc(self): - idx = Float64Index([0.0, 1.0, 2.0]) - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(1, method), 1) - if method is not None: - self.assertEqual(idx.get_loc(1, method, tolerance=0), 1) - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc(1.1, method), loc) - self.assertEqual(idx.get_loc(1.1, method, tolerance=0.9), loc) - - self.assertRaises(KeyError, idx.get_loc, 'foo') - self.assertRaises(KeyError, idx.get_loc, 1.5) - self.assertRaises(KeyError, idx.get_loc, 1.5, method='pad', - tolerance=0.1) - - with tm.assertRaisesRegexp(ValueError, 'must be numeric'): - idx.get_loc(1.4, method='nearest', tolerance='foo') - - def test_get_loc_na(self): - idx = Float64Index([np.nan, 1, 2]) - self.assertEqual(idx.get_loc(1), 1) - self.assertEqual(idx.get_loc(np.nan), 0) - - idx = Float64Index([np.nan, 1, np.nan]) - self.assertEqual(idx.get_loc(1), 1) - - # representable by slice [0:2:2] - # self.assertRaises(KeyError, idx.slice_locs, np.nan) - sliced = idx.slice_locs(np.nan) - self.assertTrue(isinstance(sliced, tuple)) - self.assertEqual(sliced, (0, 3)) - - # not representable by slice - idx = Float64Index([np.nan, 1, np.nan, np.nan]) - self.assertEqual(idx.get_loc(1), 1) - self.assertRaises(KeyError, idx.slice_locs, np.nan) - - def test_contains_nans(self): - i = Float64Index([1.0, 2.0, np.nan]) - self.assertTrue(np.nan in i) - - def test_contains_not_nans(self): - i = Float64Index([1.0, 2.0, np.nan]) - self.assertTrue(1.0 in i) - - def test_doesnt_contain_all_the_things(self): - i = Float64Index([np.nan]) - self.assertFalse(i.isin([0]).item()) - self.assertFalse(i.isin([1]).item()) - self.assertTrue(i.isin([np.nan]).item()) - - def test_nan_multiple_containment(self): - i = Float64Index([1.0, np.nan]) - tm.assert_numpy_array_equal(i.isin([1.0]), np.array([True, False])) - tm.assert_numpy_array_equal(i.isin([2.0, np.pi]), - np.array([False, False])) - tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, True])) - tm.assert_numpy_array_equal(i.isin([1.0, np.nan]), - np.array([True, True])) - i = Float64Index([1.0, 2.0]) - tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, False])) - - def test_astype_from_object(self): - index = Index([1.0, np.nan, 0.2], dtype='object') - result = index.astype(float) - expected = Float64Index([1.0, np.nan, 0.2]) - tm.assert_equal(result.dtype, expected.dtype) - tm.assert_index_equal(result, expected) - - def test_fillna_float64(self): - # GH 11343 - idx = Index([1.0, np.nan, 3.0], dtype=float, name='x') - # can't downcast - exp = Index([1.0, 0.1, 3.0], name='x') - self.assert_index_equal(idx.fillna(0.1), exp) - - # downcast - exp = Float64Index([1.0, 2.0, 3.0], name='x') - self.assert_index_equal(idx.fillna(2), exp) - - # object - exp = Index([1.0, 'obj', 3.0], name='x') - self.assert_index_equal(idx.fillna('obj'), exp) - - -class TestInt64Index(Numeric, tm.TestCase): - _holder = Int64Index - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) - self.setup_indices() - - def create_index(self): - return Int64Index(np.arange(5, dtype='int64')) - - def test_too_many_names(self): - def testit(): - self.index.names = ["roger", "harold"] - - assertRaisesRegexp(ValueError, "^Length", testit) - - def test_constructor(self): - # pass list, coerce fine - index = Int64Index([-5, 0, 1, 2]) - expected = np.array([-5, 0, 1, 2], dtype=np.int64) - tm.assert_numpy_array_equal(index, expected) - - # from iterable - index = Int64Index(iter([-5, 0, 1, 2])) - tm.assert_numpy_array_equal(index, expected) - - # scalar raise Exception - self.assertRaises(TypeError, Int64Index, 5) - - # copy - arr = self.index.values - new_index = Int64Index(arr, copy=True) - tm.assert_numpy_array_equal(new_index, self.index) - val = arr[0] + 3000 - # this should not change index - arr[0] = val - self.assertNotEqual(new_index[0], val) - - def test_constructor_corner(self): - arr = np.array([1, 2, 3, 4], dtype=object) - index = Int64Index(arr) - self.assertEqual(index.values.dtype, np.int64) - self.assertTrue(index.equals(arr)) - - # preventing casting - arr = np.array([1, '2', 3, '4'], dtype=object) - with tm.assertRaisesRegexp(TypeError, 'casting'): - Int64Index(arr) - - arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] - with tm.assertRaisesRegexp(TypeError, 'casting'): - Int64Index(arr_with_floats) - - def test_copy(self): - i = Int64Index([], name='Foo') - i_copy = i.copy() - self.assertEqual(i_copy.name, 'Foo') - - def test_view(self): - super(TestInt64Index, self).test_view() - - i = Int64Index([], name='Foo') - i_view = i.view() - self.assertEqual(i_view.name, 'Foo') - - i_view = i.view('i8') - tm.assert_index_equal(i, Int64Index(i_view, name='Foo')) - - i_view = i.view(Int64Index) - tm.assert_index_equal(i, Int64Index(i_view, name='Foo')) - - def test_coerce_list(self): - # coerce things - arr = Index([1, 2, 3, 4]) - tm.assertIsInstance(arr, Int64Index) - - # but not if explicit dtype passed - arr = Index([1, 2, 3, 4], dtype=object) - tm.assertIsInstance(arr, Index) - - def test_dtype(self): - self.assertEqual(self.index.dtype, np.int64) - - def test_is_monotonic(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_monotonic_increasing) - self.assertFalse(self.index.is_monotonic_decreasing) - - index = Int64Index([4, 3, 2, 1]) - self.assertFalse(index.is_monotonic) - self.assertTrue(index.is_monotonic_decreasing) - - index = Int64Index([1]) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) - - def test_is_monotonic_na(self): - examples = [Index([np.nan]), - Index([np.nan, 1]), - Index([1, 2, np.nan]), - Index(['a', 'b', np.nan]), - pd.to_datetime(['NaT']), - pd.to_datetime(['NaT', '2000-01-01']), - pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']), - pd.to_timedelta(['1 day', 'NaT']), ] - for index in examples: - self.assertFalse(index.is_monotonic_increasing) - self.assertFalse(index.is_monotonic_decreasing) - - def test_equals(self): - same_values = Index(self.index, dtype=object) - self.assertTrue(self.index.equals(same_values)) - self.assertTrue(same_values.equals(self.index)) - - def test_logical_compat(self): - idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) - - def test_identical(self): - i = Index(self.index.copy()) - self.assertTrue(i.identical(self.index)) - - same_values_different_type = Index(i, dtype=object) - self.assertFalse(i.identical(same_values_different_type)) - - i = self.index.copy(dtype=object) - i = i.rename('foo') - same_values = Index(i, dtype=object) - self.assertTrue(same_values.identical(i)) - - self.assertFalse(i.identical(self.index)) - self.assertTrue(Index(same_values, name='foo', dtype=object).identical( - i)) - - self.assertFalse(self.index.copy(dtype=object) - .identical(self.index.copy(dtype='int64'))) - - def test_get_indexer(self): - target = Int64Index(np.arange(10)) - indexer = self.index.get_indexer(target) - expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) - tm.assert_numpy_array_equal(indexer, expected) - - def test_get_indexer_pad(self): - target = Int64Index(np.arange(10)) - indexer = self.index.get_indexer(target, method='pad') - expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) - tm.assert_numpy_array_equal(indexer, expected) - - def test_get_indexer_backfill(self): - target = Int64Index(np.arange(10)) - indexer = self.index.get_indexer(target, method='backfill') - expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) - tm.assert_numpy_array_equal(indexer, expected) - - def test_join_outer(self): - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - # guarantee of sortedness - res, lidx, ridx = self.index.join(other, how='outer', - return_indexers=True) - noidx_res = self.index.join(other, how='outer') - self.assertTrue(res.equals(noidx_res)) - - eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) - elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], - dtype=np.int64) - eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], - dtype=np.int64) - - tm.assertIsInstance(res, Int64Index) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='outer', - return_indexers=True) - noidx_res = self.index.join(other_mono, how='outer') - self.assertTrue(res.equals(noidx_res)) - - eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], - dtype=np.int64) - tm.assertIsInstance(res, Int64Index) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_inner(self): - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = self.index.join(other, how='inner', - return_indexers=True) - - # no guarantee of sortedness, so sort for comparison purposes - ind = res.argsort() - res = res.take(ind) - lidx = lidx.take(ind) - ridx = ridx.take(ind) - - eres = Int64Index([2, 12]) - elidx = np.array([1, 6]) - eridx = np.array([4, 1]) - - tm.assertIsInstance(res, Int64Index) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='inner', - return_indexers=True) - - res2 = self.index.intersection(other_mono) - self.assertTrue(res.equals(res2)) - - eridx = np.array([1, 4]) - tm.assertIsInstance(res, Int64Index) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_left(self): - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = self.index.join(other, how='left', - return_indexers=True) - eres = self.index - eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], - dtype=np.int64) - - tm.assertIsInstance(res, Int64Index) - self.assertTrue(res.equals(eres)) - self.assertIsNone(lidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='left', - return_indexers=True) - eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], - dtype=np.int64) - tm.assertIsInstance(res, Int64Index) - self.assertTrue(res.equals(eres)) - self.assertIsNone(lidx) - tm.assert_numpy_array_equal(ridx, eridx) - - # non-unique - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = idx2.join(idx, how='left', return_indexers=True) - eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - eridx = np.array([0, 1, 2, 3, -1, -1]) - elidx = np.array([0, 0, 1, 2, 3, 4]) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_right(self): - other = Int64Index([7, 12, 25, 1, 2, 5]) - other_mono = Int64Index([1, 2, 5, 7, 12, 25]) - - # not monotonic - res, lidx, ridx = self.index.join(other, how='right', - return_indexers=True) - eres = other - elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int64) - - tm.assertIsInstance(other, Int64Index) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) - - # monotonic - res, lidx, ridx = self.index.join(other_mono, how='right', - return_indexers=True) - eres = other_mono - elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.int64) - tm.assertIsInstance(other, Int64Index) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) - - # non-unique - idx = Index([1, 1, 2, 5]) - idx2 = Index([1, 2, 5, 7, 9]) - res, lidx, ridx = idx.join(idx2, how='right', return_indexers=True) - eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 - elidx = np.array([0, 1, 2, 3, -1, -1]) - eridx = np.array([0, 0, 1, 2, 3, 4]) - self.assertTrue(res.equals(eres)) - tm.assert_numpy_array_equal(lidx, elidx) - tm.assert_numpy_array_equal(ridx, eridx) - - def test_join_non_int_index(self): - other = Index([3, 6, 7, 8, 10], dtype=object) - - outer = self.index.join(other, how='outer') - outer2 = other.join(self.index, how='outer') - expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, - 16, 18], dtype=object) - self.assertTrue(outer.equals(outer2)) - self.assertTrue(outer.equals(expected)) - - inner = self.index.join(other, how='inner') - inner2 = other.join(self.index, how='inner') - expected = Index([6, 8, 10], dtype=object) - self.assertTrue(inner.equals(inner2)) - self.assertTrue(inner.equals(expected)) - - left = self.index.join(other, how='left') - self.assertTrue(left.equals(self.index)) - - left2 = other.join(self.index, how='left') - self.assertTrue(left2.equals(other)) - - right = self.index.join(other, how='right') - self.assertTrue(right.equals(other)) - - right2 = other.join(self.index, how='right') - self.assertTrue(right2.equals(self.index)) - - def test_join_non_unique(self): - left = Index([4, 4, 3, 3]) - - joined, lidx, ridx = left.join(left, return_indexers=True) - - exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) - self.assertTrue(joined.equals(exp_joined)) - - exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.int64) - tm.assert_numpy_array_equal(lidx, exp_lidx) - - exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.int64) - tm.assert_numpy_array_equal(ridx, exp_ridx) - - def test_join_self(self): - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = self.index.join(self.index, how=kind) - self.assertIs(self.index, joined) - - def test_intersection(self): - other = Index([1, 2, 3, 4, 5]) - result = self.index.intersection(other) - expected = np.sort(np.intersect1d(self.index.values, other.values)) - tm.assert_numpy_array_equal(result, expected) - - result = other.intersection(self.index) - expected = np.sort(np.asarray(np.intersect1d(self.index.values, - other.values))) - tm.assert_numpy_array_equal(result, expected) - - def test_intersect_str_dates(self): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - i1 = Index(dt_dates, dtype=object) - i2 = Index(['aa'], dtype=object) - res = i2.intersection(i1) - - self.assertEqual(len(res), 0) - - def test_union_noncomparable(self): - from datetime import datetime, timedelta - # corner case, non-Int64Index - now = datetime.now() - other = Index([now + timedelta(i) for i in range(4)], dtype=object) - result = self.index.union(other) - expected = np.concatenate((self.index, other)) - tm.assert_numpy_array_equal(result, expected) - - result = other.union(self.index) - expected = np.concatenate((other, self.index)) - tm.assert_numpy_array_equal(result, expected) - - def test_cant_or_shouldnt_cast(self): - # can't - data = ['foo', 'bar', 'baz'] - self.assertRaises(TypeError, Int64Index, data) - - # shouldn't - data = ['0', '1', '2'] - self.assertRaises(TypeError, Int64Index, data) - - def test_view_Index(self): - self.index.view(Index) - - def test_prevent_casting(self): - result = self.index.astype('O') - self.assertEqual(result.dtype, np.object_) - - def test_take_preserve_name(self): - index = Int64Index([1, 2, 3, 4], name='foo') - taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) - - def test_int_name_format(self): - index = Index(['a', 'b', 'c'], name=0) - s = Series(lrange(3), index) - df = DataFrame(lrange(3), index=index) - repr(s) - repr(df) - - def test_print_unicode_columns(self): - df = pd.DataFrame({u("\u05d0"): [1, 2, 3], - "\u05d1": [4, 5, 6], - "c": [7, 8, 9]}) - repr(df.columns) # should not raise UnicodeDecodeError - - def test_repr_summary(self): - with cf.option_context('display.max_seq_items', 10): - r = repr(pd.Index(np.arange(1000))) - self.assertTrue(len(r) < 200) - self.assertTrue("..." in r) - - def test_repr_roundtrip(self): - tm.assert_index_equal(eval(repr(self.index)), self.index) - - def test_unicode_string_with_unicode(self): - idx = Index(lrange(1000)) - - if PY3: - str(idx) - else: - compat.text_type(idx) - - def test_bytestring_with_unicode(self): - idx = Index(lrange(1000)) - if PY3: - bytes(idx) - else: - str(idx) - - def test_slice_keep_name(self): - idx = Int64Index([1, 2], name='asdf') - self.assertEqual(idx.name, idx[1:].name) - - def test_ufunc_coercions(self): - idx = Int64Index([1, 2, 3, 4, 5], name='x') - - result = np.sqrt(idx) - tm.assertIsInstance(result, Float64Index) - exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') - tm.assert_index_equal(result, exp) - - result = np.divide(idx, 2.) - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') - tm.assert_index_equal(result, exp) - - # _evaluate_numeric_binop - result = idx + 2. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([3., 4., 5., 6., 7.], name='x') - tm.assert_index_equal(result, exp) - - result = idx - 2. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([-1., 0., 1., 2., 3.], name='x') - tm.assert_index_equal(result, exp) - - result = idx * 1. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([1., 2., 3., 4., 5.], name='x') - tm.assert_index_equal(result, exp) - - result = idx / 2. - tm.assertIsInstance(result, Float64Index) - exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') - tm.assert_index_equal(result, exp) - - -class TestRangeIndex(Numeric, tm.TestCase): - _holder = RangeIndex - _compat_props = ['shape', 'ndim', 'size', 'itemsize'] - - def setUp(self): - self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) - self.setup_indices() - - def create_index(self): - return RangeIndex(5) - - def test_binops(self): - ops = [operator.add, operator.sub, operator.mul, operator.floordiv, - operator.truediv, pow] - scalars = [-1, 1, 2] - idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2), - RangeIndex(-10, 10, 2), RangeIndex(5, -5, -1)] - for op in ops: - for a, b in combinations(idxs, 2): - result = op(a, b) - expected = op(Int64Index(a), Int64Index(b)) - tm.assert_index_equal(result, expected) - for idx in idxs: - for scalar in scalars: - result = op(idx, scalar) - expected = op(Int64Index(idx), scalar) - tm.assert_index_equal(result, expected) - - def test_too_many_names(self): - def testit(): - self.index.names = ["roger", "harold"] - - assertRaisesRegexp(ValueError, "^Length", testit) - - def test_constructor(self): - index = RangeIndex(5) - expected = np.arange(5, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._start, 0) - self.assertEqual(index._stop, 5) - self.assertEqual(index._step, 1) - self.assertEqual(index.name, None) - tm.assert_index_equal(Index(expected), index) - - index = RangeIndex(1, 5) - expected = np.arange(1, 5, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._start, 1) - tm.assert_index_equal(Index(expected), index) - - index = RangeIndex(1, 5, 2) - expected = np.arange(1, 5, 2, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._step, 2) - tm.assert_index_equal(Index(expected), index) - - index = RangeIndex() - expected = np.empty(0, dtype=np.int64) - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index._start, 0) - self.assertEqual(index._stop, 0) - self.assertEqual(index._step, 1) - tm.assert_index_equal(Index(expected), index) - - index = RangeIndex(name='Foo') - self.assertIsInstance(index, RangeIndex) - self.assertEqual(index.name, 'Foo') - - # we don't allow on a bare Index - self.assertRaises(TypeError, lambda: Index(0, 1000)) - - # invalid args - for i in [Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']), - [], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10)]: - self.assertRaises(TypeError, lambda: RangeIndex(i)) - - def test_constructor_same(self): - - # pass thru w and w/o copy - index = RangeIndex(1, 5, 2) - result = RangeIndex(index, copy=False) - self.assertTrue(result.identical(index)) - - result = RangeIndex(index, copy=True) - self.assertTrue(result.equals(index)) - - result = RangeIndex(index) - self.assertTrue(result.equals(index)) - - self.assertRaises(TypeError, - lambda: RangeIndex(index, dtype='float64')) - - def test_constructor_range(self): - - self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2))) - - result = RangeIndex.from_range(range(1, 5, 2)) - expected = RangeIndex(1, 5, 2) - self.assertTrue(result.equals(expected)) - - result = RangeIndex.from_range(range(5, 6)) - expected = RangeIndex(5, 6, 1) - self.assertTrue(result.equals(expected)) - - # an invalid range - result = RangeIndex.from_range(range(5, 1)) - expected = RangeIndex(0, 0, 1) - self.assertTrue(result.equals(expected)) - - result = RangeIndex.from_range(range(5)) - expected = RangeIndex(0, 5, 1) - self.assertTrue(result.equals(expected)) - - result = Index(range(1, 5, 2)) - expected = RangeIndex(1, 5, 2) - self.assertTrue(result.equals(expected)) - - self.assertRaises(TypeError, - lambda: Index(range(1, 5, 2), dtype='float64')) - - def test_numeric_compat2(self): - # validate that we are handling the RangeIndex overrides to numeric ops - # and returning RangeIndex where possible - - idx = RangeIndex(0, 10, 2) - - result = idx * 2 - expected = RangeIndex(0, 20, 4) - self.assertTrue(result.equals(expected)) - - result = idx + 2 - expected = RangeIndex(2, 12, 2) - self.assertTrue(result.equals(expected)) - - result = idx - 2 - expected = RangeIndex(-2, 8, 2) - self.assertTrue(result.equals(expected)) - - # truediv under PY3 - result = idx / 2 - if PY3: - expected = RangeIndex(0, 5, 1) - else: - expected = RangeIndex(0, 5, 1).astype('float64') - self.assertTrue(result.equals(expected)) - - result = idx / 4 - expected = RangeIndex(0, 10, 2).values / 4 - self.assertTrue(result.equals(expected)) - - result = idx // 1 - expected = idx - tm.assert_index_equal(result, expected, exact=True) - - # __mul__ - result = idx * idx - expected = Index(idx.values * idx.values) - tm.assert_index_equal(result, expected, exact=True) - - # __pow__ - idx = RangeIndex(0, 1000, 2) - result = idx ** 2 - expected = idx._int64index ** 2 - tm.assert_index_equal(Index(result.values), expected, exact=True) - - # __floordiv__ - cases_exact = [(RangeIndex(0, 1000, 2), 2, RangeIndex(0, 500, 1)), - (RangeIndex(-99, -201, -3), -3, RangeIndex(33, 67, 1)), - (RangeIndex(0, 1000, 1), 2, - RangeIndex(0, 1000, 1)._int64index // 2), - (RangeIndex(0, 100, 1), 2.0, - RangeIndex(0, 100, 1)._int64index // 2.0), - (RangeIndex(), 50, RangeIndex()), - (RangeIndex(2, 4, 2), 3, RangeIndex(0, 1, 1)), - (RangeIndex(-5, -10, -6), 4, RangeIndex(-2, -1, 1)), - (RangeIndex(-100, -200, 3), 2, RangeIndex())] - for idx, div, expected in cases_exact: - tm.assert_index_equal(idx // div, expected, exact=True) - - def test_constructor_corner(self): - arr = np.array([1, 2, 3, 4], dtype=object) - index = RangeIndex(1, 5) - self.assertEqual(index.values.dtype, np.int64) - self.assertTrue(index.equals(arr)) - - # non-int raise Exception - self.assertRaises(TypeError, RangeIndex, '1', '10', '1') - self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) - - # invalid passed type - self.assertRaises(TypeError, lambda: RangeIndex(1, 5, dtype='float64')) - - def test_copy(self): - i = RangeIndex(5, name='Foo') - i_copy = i.copy() - self.assertTrue(i_copy is not i) - self.assertTrue(i_copy.identical(i)) - self.assertEqual(i_copy._start, 0) - self.assertEqual(i_copy._stop, 5) - self.assertEqual(i_copy._step, 1) - self.assertEqual(i_copy.name, 'Foo') - - def test_repr(self): - i = RangeIndex(5, name='Foo') - result = repr(i) - if PY3: - expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" - else: - expected = "RangeIndex(start=0, stop=5, step=1, name=u'Foo')" - self.assertTrue(result, expected) - - result = eval(result) - self.assertTrue(result.equals(i)) - - i = RangeIndex(5, 0, -1) - result = repr(i) - expected = "RangeIndex(start=5, stop=0, step=-1)" - self.assertEqual(result, expected) - - result = eval(result) - self.assertTrue(result.equals(i)) - - def test_insert(self): - - idx = RangeIndex(5, name='Foo') - result = idx[1:4] - - # test 0th element - self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) - - def test_delete(self): - - idx = RangeIndex(5, name='Foo') - expected = idx[1:].astype(int) - result = idx.delete(0) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - - expected = idx[:-1].astype(int) - result = idx.delete(-1) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.name, expected.name) - - with tm.assertRaises((IndexError, ValueError)): - # either depending on numpy version - result = idx.delete(len(idx)) - - def test_view(self): - super(TestRangeIndex, self).test_view() - - i = RangeIndex(name='Foo') - i_view = i.view() - self.assertEqual(i_view.name, 'Foo') - - i_view = i.view('i8') - tm.assert_numpy_array_equal(i, i_view) - - i_view = i.view(RangeIndex) - tm.assert_index_equal(i, i_view) - - def test_dtype(self): - self.assertEqual(self.index.dtype, np.int64) - - def test_is_monotonic(self): - self.assertTrue(self.index.is_monotonic) - self.assertTrue(self.index.is_monotonic_increasing) - self.assertFalse(self.index.is_monotonic_decreasing) - - index = RangeIndex(4, 0, -1) - self.assertFalse(index.is_monotonic) - self.assertTrue(index.is_monotonic_decreasing) - - index = RangeIndex(1, 2) - self.assertTrue(index.is_monotonic) - self.assertTrue(index.is_monotonic_increasing) - self.assertTrue(index.is_monotonic_decreasing) - - def test_equals(self): - equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), - (RangeIndex(0), RangeIndex(1, -1, 3)), - (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), - (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2))] - for left, right in equiv_pairs: - self.assertTrue(left.equals(right)) - self.assertTrue(right.equals(left)) - - def test_logical_compat(self): - idx = self.create_index() - self.assertEqual(idx.all(), idx.values.all()) - self.assertEqual(idx.any(), idx.values.any()) - - def test_identical(self): - i = Index(self.index.copy()) - self.assertTrue(i.identical(self.index)) - - # we don't allow object dtype for RangeIndex - if isinstance(self.index, RangeIndex): - return - - same_values_different_type = Index(i, dtype=object) - self.assertFalse(i.identical(same_values_different_type)) - - i = self.index.copy(dtype=object) - i = i.rename('foo') - same_values = Index(i, dtype=object) - self.assertTrue(same_values.identical(self.index.copy(dtype=object))) - - self.assertFalse(i.identical(self.index)) - self.assertTrue(Index(same_values, name='foo', dtype=object).identical( - i)) - - self.assertFalse(self.index.copy(dtype=object) - .identical(self.index.copy(dtype='int64'))) - - def test_get_indexer(self): - target = RangeIndex(10) - indexer = self.index.get_indexer(target) - expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) - self.assert_numpy_array_equal(indexer, expected) - - def test_get_indexer_pad(self): - target = RangeIndex(10) - indexer = self.index.get_indexer(target, method='pad') - expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) - self.assert_numpy_array_equal(indexer, expected) - - def test_get_indexer_backfill(self): - target = RangeIndex(10) - indexer = self.index.get_indexer(target, method='backfill') - expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) - self.assert_numpy_array_equal(indexer, expected) - - def test_join_outer(self): - # join with Int64Index - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = self.index.join(other, how='outer', - return_indexers=True) - noidx_res = self.index.join(other, how='outer') - self.assertTrue(res.equals(noidx_res)) - - eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25]) - elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, - -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) - eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, - 5, 4, 3, 2, 1, 0], dtype=np.int64) - - self.assertIsInstance(res, Int64Index) - self.assertFalse(isinstance(res, RangeIndex)) - self.assertTrue(res.equals(eres)) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) - - # join with RangeIndex - other = RangeIndex(25, 14, -1) - - res, lidx, ridx = self.index.join(other, how='outer', - return_indexers=True) - noidx_res = self.index.join(other, how='outer') - self.assertTrue(res.equals(noidx_res)) - - self.assertIsInstance(res, Int64Index) - self.assertFalse(isinstance(res, RangeIndex)) - self.assertTrue(res.equals(eres)) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) - - def test_join_inner(self): - # Join with non-RangeIndex - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = self.index.join(other, how='inner', - return_indexers=True) - - # no guarantee of sortedness, so sort for comparison purposes - ind = res.argsort() - res = res.take(ind) - lidx = lidx.take(ind) - ridx = ridx.take(ind) - - eres = Int64Index([16, 18]) - elidx = np.array([8, 9]) - eridx = np.array([9, 7]) - - self.assertIsInstance(res, Int64Index) - self.assertTrue(res.equals(eres)) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) - - # Join two RangeIndex - other = RangeIndex(25, 14, -1) - - res, lidx, ridx = self.index.join(other, how='inner', - return_indexers=True) - - self.assertIsInstance(res, RangeIndex) - self.assertTrue(res.equals(eres)) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) - - def test_join_left(self): - # Join with Int64Index - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = self.index.join(other, how='left', - return_indexers=True) - eres = self.index - eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], - dtype=np.int64) - - self.assertIsInstance(res, RangeIndex) - self.assertTrue(res.equals(eres)) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, eridx) - - # Join withRangeIndex - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = self.index.join(other, how='left', - return_indexers=True) - - self.assertIsInstance(res, RangeIndex) - self.assertTrue(res.equals(eres)) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, eridx) - - def test_join_right(self): - # Join with Int64Index - other = Int64Index(np.arange(25, 14, -1)) - - res, lidx, ridx = self.index.join(other, how='right', - return_indexers=True) - eres = other - elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], - dtype=np.int64) - - self.assertIsInstance(other, Int64Index) - self.assertTrue(res.equals(eres)) - self.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) - - # Join withRangeIndex - other = RangeIndex(25, 14, -1) - - res, lidx, ridx = self.index.join(other, how='right', - return_indexers=True) - eres = other - - self.assertIsInstance(other, RangeIndex) - self.assertTrue(res.equals(eres)) - self.assert_numpy_array_equal(lidx, elidx) - self.assertIsNone(ridx) - - def test_join_non_int_index(self): - other = Index([3, 6, 7, 8, 10], dtype=object) - - outer = self.index.join(other, how='outer') - outer2 = other.join(self.index, how='outer') - expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, - 16, 18], dtype=object) - self.assertTrue(outer.equals(outer2)) - self.assertTrue(outer.equals(expected)) - - inner = self.index.join(other, how='inner') - inner2 = other.join(self.index, how='inner') - expected = Index([6, 8, 10], dtype=object) - self.assertTrue(inner.equals(inner2)) - self.assertTrue(inner.equals(expected)) - - left = self.index.join(other, how='left') - self.assertTrue(left.equals(self.index)) - - left2 = other.join(self.index, how='left') - self.assertTrue(left2.equals(other)) - - right = self.index.join(other, how='right') - self.assertTrue(right.equals(other)) - - right2 = other.join(self.index, how='right') - self.assertTrue(right2.equals(self.index)) - - def test_join_non_unique(self): - other = Index([4, 4, 3, 3]) - - res, lidx, ridx = self.index.join(other, return_indexers=True) - - eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) - elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) - eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], - dtype=np.int64) - - self.assertTrue(res.equals(eres)) - self.assert_numpy_array_equal(lidx, elidx) - self.assert_numpy_array_equal(ridx, eridx) - - def test_join_self(self): - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - joined = self.index.join(self.index, how=kind) - self.assertIs(self.index, joined) - - def test_intersection(self): - # intersect with Int64Index - other = Index(np.arange(1, 6)) - result = self.index.intersection(other) - expected = np.sort(np.intersect1d(self.index.values, other.values)) - self.assert_numpy_array_equal(result, expected) - - result = other.intersection(self.index) - expected = np.sort(np.asarray(np.intersect1d(self.index.values, - other.values))) - self.assert_numpy_array_equal(result, expected) - - # intersect with increasing RangeIndex - other = RangeIndex(1, 6) - result = self.index.intersection(other) - expected = np.sort(np.intersect1d(self.index.values, other.values)) - self.assert_numpy_array_equal(result, expected) - - # intersect with decreasing RangeIndex - other = RangeIndex(5, 0, -1) - result = self.index.intersection(other) - expected = np.sort(np.intersect1d(self.index.values, other.values)) - self.assert_numpy_array_equal(result, expected) - - def test_intersect_str_dates(self): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - i1 = Index(dt_dates, dtype=object) - i2 = Index(['aa'], dtype=object) - res = i2.intersection(i1) - - self.assertEqual(len(res), 0) - - def test_union_noncomparable(self): - from datetime import datetime, timedelta - # corner case, non-Int64Index - now = datetime.now() - other = Index([now + timedelta(i) for i in range(4)], dtype=object) - result = self.index.union(other) - expected = np.concatenate((self.index, other)) - self.assert_numpy_array_equal(result, expected) - - result = other.union(self.index) - expected = np.concatenate((other, self.index)) - self.assert_numpy_array_equal(result, expected) - - def test_union(self): - RI = RangeIndex - I64 = Int64Index - cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)), - (RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)), - (RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)), - (RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)), - (RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)), - (RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)), - (RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)), - (RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)), - (RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)), - (RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)), - (RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)), - (RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)), - (RI(), RI(), RI()), - (RI(0, -10, -2), RI(), RI(0, -10, -2)), - (RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)), - (RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)), - (RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)), - (RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)), - (RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)), - (RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])), - (RI(0, 10, 1), I64([]), RI(0, 10, 1)), - (RI(), I64([1, 5, 6]), I64([1, 5, 6]))] - for idx1, idx2, expected in cases: - res1 = idx1.union(idx2) - res2 = idx2.union(idx1) - res3 = idx1._int64index.union(idx2) - tm.assert_index_equal(res1, expected, exact=True) - tm.assert_index_equal(res2, expected, exact=True) - tm.assert_index_equal(res3, expected) - - def test_nbytes(self): - - # memory savings vs int index - i = RangeIndex(0, 1000) - self.assertTrue(i.nbytes < i.astype(int).nbytes / 10) - - # constant memory usage - i2 = RangeIndex(0, 10) - self.assertEqual(i.nbytes, i2.nbytes) - - def test_cant_or_shouldnt_cast(self): - # can't - self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') - - # shouldn't - self.assertRaises(TypeError, RangeIndex, '0', '1', '2') - - def test_view_Index(self): - self.index.view(Index) - - def test_prevent_casting(self): - result = self.index.astype('O') - self.assertEqual(result.dtype, np.object_) - - def test_take_preserve_name(self): - index = RangeIndex(1, 5, name='foo') - taken = index.take([3, 0, 1]) - self.assertEqual(index.name, taken.name) - - def test_print_unicode_columns(self): - df = pd.DataFrame({u("\u05d0"): [1, 2, 3], - "\u05d1": [4, 5, 6], - "c": [7, 8, 9]}) - repr(df.columns) # should not raise UnicodeDecodeError - - def test_repr_roundtrip(self): - tm.assert_index_equal(eval(repr(self.index)), self.index) - - def test_slice_keep_name(self): - idx = RangeIndex(1, 2, name='asdf') - self.assertEqual(idx.name, idx[1:].name) - - def test_explicit_conversions(self): - - # GH 8608 - # add/sub are overriden explicity for Float/Int Index - idx = RangeIndex(5) - - # float conversions - arr = np.arange(5, dtype='int64') * 3.2 - expected = Float64Index(arr) - fidx = idx * 3.2 - tm.assert_index_equal(fidx, expected) - fidx = 3.2 * idx - tm.assert_index_equal(fidx, expected) - - # interops with numpy arrays - expected = Float64Index(arr) - a = np.zeros(5, dtype='float64') - result = fidx - a - tm.assert_index_equal(result, expected) - - expected = Float64Index(-arr) - a = np.zeros(5, dtype='float64') - result = a - fidx - tm.assert_index_equal(result, expected) - - def test_duplicates(self): - for ind in self.indices: - if not len(ind): - continue - idx = self.indices[ind] - self.assertTrue(idx.is_unique) - self.assertFalse(idx.has_duplicates) - - def test_ufunc_compat(self): - idx = RangeIndex(5) - result = np.sin(idx) - expected = Float64Index(np.sin(np.arange(5, dtype='int64'))) - tm.assert_index_equal(result, expected) - - def test_extended_gcd(self): - result = self.index._extended_gcd(6, 10) - self.assertEqual(result[0], result[1] * 6 + result[2] * 10) - self.assertEqual(2, result[0]) - - result = self.index._extended_gcd(10, 6) - self.assertEqual(2, result[1] * 10 + result[2] * 6) - self.assertEqual(2, result[0]) - - def test_min_fitting_element(self): - result = RangeIndex(0, 20, 2)._min_fitting_element(1) - self.assertEqual(2, result) - - result = RangeIndex(1, 6)._min_fitting_element(1) - self.assertEqual(1, result) - - result = RangeIndex(18, -2, -2)._min_fitting_element(1) - self.assertEqual(2, result) - - result = RangeIndex(5, 0, -1)._min_fitting_element(1) - self.assertEqual(1, result) - - big_num = 500000000000000000000000 - - result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) - self.assertEqual(big_num, result) - - def test_max_fitting_element(self): - result = RangeIndex(0, 20, 2)._max_fitting_element(17) - self.assertEqual(16, result) - - result = RangeIndex(1, 6)._max_fitting_element(4) - self.assertEqual(4, result) - - result = RangeIndex(18, -2, -2)._max_fitting_element(17) - self.assertEqual(16, result) - - result = RangeIndex(5, 0, -1)._max_fitting_element(4) - self.assertEqual(4, result) - - big_num = 500000000000000000000000 - - result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) - self.assertEqual(big_num, result) - - def test_pickle_compat_construction(self): - # RangeIndex() is a valid constructor - pass - - def test_slice_specialised(self): - - # scalar indexing - res = self.index[1] - expected = 2 - self.assertEqual(res, expected) - - res = self.index[-1] - expected = 18 - self.assertEqual(res, expected) - - # slicing - # slice value completion - index = self.index[:] - expected = self.index - self.assert_numpy_array_equal(index, expected) - - # positive slice values - index = self.index[7:10:2] - expected = np.array([14, 18]) - self.assert_numpy_array_equal(index, expected) - - # negative slice values - index = self.index[-1:-5:-2] - expected = np.array([18, 14]) - self.assert_numpy_array_equal(index, expected) - - # stop overshoot - index = self.index[2:100:4] - expected = np.array([4, 12]) - self.assert_numpy_array_equal(index, expected) - - # reverse - index = self.index[::-1] - expected = self.index.values[::-1] - self.assert_numpy_array_equal(index, expected) - - index = self.index[-8::-1] - expected = np.array([4, 2, 0]) - self.assert_numpy_array_equal(index, expected) - - index = self.index[-40::-1] - expected = np.array([]) - self.assert_numpy_array_equal(index, expected) - - index = self.index[40::-1] - expected = self.index.values[40::-1] - self.assert_numpy_array_equal(index, expected) - - index = self.index[10::-1] - expected = self.index.values[::-1] - self.assert_numpy_array_equal(index, expected) - - def test_len_specialised(self): - - # make sure that our len is the same as - # np.arange calc - - for step in np.arange(1, 6, 1): - - arr = np.arange(0, 5, step) - i = RangeIndex(0, 5, step) - self.assertEqual(len(i), len(arr)) - - i = RangeIndex(5, 0, step) - self.assertEqual(len(i), 0) - - for step in np.arange(-6, -1, 1): - - arr = np.arange(5, 0, step) - i = RangeIndex(5, 0, step) - self.assertEqual(len(i), len(arr)) - - i = RangeIndex(0, 5, step) - self.assertEqual(len(i), 0) - - -class DatetimeLike(Base): - - def test_shift_identity(self): - - idx = self.create_index() - self.assert_index_equal(idx, idx.shift(0)) - - def test_str(self): - - # test the string repr - idx = self.create_index() - idx.name = 'foo' - self.assertFalse("length=%s" % len(idx) in str(idx)) - self.assertTrue("'foo'" in str(idx)) - self.assertTrue(idx.__class__.__name__ in str(idx)) - - if hasattr(idx, 'tz'): - if idx.tz is not None: - self.assertTrue(idx.tz in str(idx)) - if hasattr(idx, 'freq'): - self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) - - def test_view(self): - super(DatetimeLike, self).test_view() - - i = self.create_index() - - i_view = i.view('i8') - result = self._holder(i) - tm.assert_index_equal(result, i) - - i_view = i.view(self._holder) - result = self._holder(i) - tm.assert_index_equal(result, i_view) - - -class TestDatetimeIndex(DatetimeLike, tm.TestCase): - _holder = DatetimeIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makeDateIndex(10)) - self.setup_indices() - - def create_index(self): - return date_range('20130101', periods=5) - - def test_shift(self): - - # test shift for datetimeIndex and non datetimeIndex - # GH8083 - - drange = self.create_index() - result = drange.shift(1) - expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', - '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(-1) - expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', - '2013-01-03', '2013-01-04'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D') - expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', - '2013-01-10', - '2013-01-11'], freq='D') - self.assert_index_equal(result, expected) - - def test_construction_with_alt(self): - - i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') - i2 = DatetimeIndex(i, dtype=i.dtype) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz=i.dtype.tz) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) - self.assert_index_equal(i, i2) - - i2 = DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) - self.assert_index_equal(i, i2) - - # localize into the provided tz - i2 = DatetimeIndex(i.tz_localize(None).asi8, tz='UTC') - expected = i.tz_localize(None).tz_localize('UTC') - self.assert_index_equal(i2, expected) - - i2 = DatetimeIndex(i, tz='UTC') - expected = i.tz_convert('UTC') - self.assert_index_equal(i2, expected) - - # incompat tz/dtype - self.assertRaises(ValueError, lambda: DatetimeIndex( - i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) - - def test_pickle_compat_construction(self): - pass - - def test_construction_index_with_mixed_timezones(self): - # GH 11488 - # no tz results in DatetimeIndex - result = Index( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # same tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') - ], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # same tz results in DatetimeIndex (DST) - result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # different tz results in Index(dtype=object) - result = Index([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - exp = Index([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - # passing tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), - Timestamp('2011-01-03 00:00')], - tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # length = 1 - result = Index([Timestamp('2011-01-01')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # length = 1 with tz - result = Index( - [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', - name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - def test_construction_index_with_mixed_timezones_with_NaT(self): - # GH 11488 - result = Index([pd.NaT, Timestamp('2011-01-01'), - pd.NaT, Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'), - pd.NaT, Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # same tz results in DatetimeIndex - result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00')], - tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # same tz results in DatetimeIndex (DST) - result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - pd.NaT, - Timestamp('2011-08-01 10:00', tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - # different tz results in Index(dtype=object) - result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='US/Eastern')], - name='idx') - exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='US/Eastern')], name='idx') - exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - dtype='object', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertFalse(isinstance(result, DatetimeIndex)) - - # passing tz results in DatetimeIndex - result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', - tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 19:00'), - pd.NaT, Timestamp('2011-01-03 00:00')], - tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # all NaT - result = Index([pd.NaT, pd.NaT], name='idx') - exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNone(result.tz) - - # all NaT with tz - result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - self.assertIsNotNone(result.tz) - self.assertEqual(result.tz, exp.tz) - - def test_construction_dti_with_mixed_timezones(self): - # GH 11488 (not changed, added explicit tests) - - # no tz results in DatetimeIndex - result = DatetimeIndex( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # same tz results in DatetimeIndex - result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', - tz='Asia/Tokyo')], - name='idx') - exp = DatetimeIndex( - [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') - ], tz='Asia/Tokyo', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # same tz results in DatetimeIndex (DST) - result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', - tz='US/Eastern')], - name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-08-01 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # different tz coerces tz-naive to tz-awareIndex(dtype=object) - result = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', - tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), - Timestamp('2011-01-02 10:00')], - tz='US/Eastern', name='idx') - self.assert_index_equal(result, exp, exact=True) - self.assertTrue(isinstance(result, DatetimeIndex)) - - # tz mismatch affecting to tz-aware raises TypeError/ValueError - with tm.assertRaises(ValueError): - DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - name='idx') - - with tm.assertRaises(TypeError): - DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - - with tm.assertRaises(ValueError): - DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='US/Eastern', name='idx') - - def test_get_loc(self): - idx = pd.date_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - if method is not None: - self.assertEqual(idx.get_loc(idx[1], method, - tolerance=pd.Timedelta('0 days')), - 1) - - self.assertEqual(idx.get_loc('2000-01-01', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest'), 1) - - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-01T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') - - self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) - - self.assertEqual(idx.get_loc('1999', method='nearest'), 0) - self.assertEqual(idx.get_loc('2001', method='nearest'), 2) - - with tm.assertRaises(KeyError): - idx.get_loc('1999', method='pad') - with tm.assertRaises(KeyError): - idx.get_loc('2001', method='backfill') - - with tm.assertRaises(KeyError): - idx.get_loc('foobar') - with tm.assertRaises(TypeError): - idx.get_loc(slice(2)) - - idx = pd.to_datetime(['2000-01-01', '2000-01-04']) - self.assertEqual(idx.get_loc('2000-01-02', method='nearest'), 0) - self.assertEqual(idx.get_loc('2000-01-03', method='nearest'), 1) - self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 2)) - - # time indexing - idx = pd.date_range('2000-01-01', periods=24, freq='H') - tm.assert_numpy_array_equal(idx.get_loc(time(12)), [12]) - tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), []) - with tm.assertRaises(NotImplementedError): - idx.get_loc(time(12, 30), method='pad') - - def test_get_indexer(self): - idx = pd.date_range('2000-01-01', periods=3) - tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) - - target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', - '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest'), [0, 1, 1]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')), - [0, -1, 1]) - with tm.assertRaises(ValueError): - idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') - - def test_roundtrip_pickle_with_tz(self): - - # GH 8367 - # round-trip of timezone - index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') - unpickled = self.round_trip_pickle(index) - self.assertTrue(index.equals(unpickled)) - - def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): - # GH7774 - index = date_range('20130101', periods=3, tz='US/Eastern') - self.assertEqual(str(index.reindex([])[0].tz), 'US/Eastern') - self.assertEqual(str(index.reindex(np.array([]))[0].tz), 'US/Eastern') - - def test_time_loc(self): # GH8667 - from datetime import time - from pandas.index import _SIZE_CUTOFF - - ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) - key = time(15, 11, 30) - start = key.hour * 3600 + key.minute * 60 + key.second - step = 24 * 3600 - - for n in ns: - idx = pd.date_range('2014-11-26', periods=n, freq='S') - ts = pd.Series(np.random.randn(n), index=idx) - i = np.arange(start, n, step) - - tm.assert_numpy_array_equal(ts.index.get_loc(key), i) - tm.assert_series_equal(ts[key], ts.iloc[i]) - - left, right = ts.copy(), ts.copy() - left[key] *= -10 - right.iloc[i] *= -10 - tm.assert_series_equal(left, right) - - def test_time_overflow_for_32bit_machines(self): - # GH8943. On some machines NumPy defaults to np.int32 (for example, - # 32-bit Linux machines). In the function _generate_regular_range - # found in tseries/index.py, `periods` gets multiplied by `strides` - # (which has value 1e9) and since the max value for np.int32 is ~2e9, - # and since those machines won't promote np.int32 to np.int64, we get - # overflow. - periods = np.int_(1000) - - idx1 = pd.date_range(start='2000', periods=periods, freq='S') - self.assertEqual(len(idx1), periods) - - idx2 = pd.date_range(end='2000', periods=periods, freq='S') - self.assertEqual(len(idx2), periods) - - def test_intersection(self): - first = self.index - second = self.index[5:] - intersect = first.intersection(second) - self.assertTrue(tm.equalContents(intersect, second)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.intersection(case) - self.assertTrue(tm.equalContents(result, second)) - - third = Index(['a', 'b', 'c']) - result = first.intersection(third) - expected = pd.Index([], dtype=object) - self.assert_index_equal(result, expected) - - def test_union(self): - first = self.index[:5] - second = self.index[5:] - everything = self.index - union = first.union(second) - self.assertTrue(tm.equalContents(union, everything)) - - # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] - for case in cases: - result = first.union(case) - self.assertTrue(tm.equalContents(result, everything)) - - def test_nat(self): - self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) - - def test_ufunc_coercions(self): - idx = date_range('2011-01-01', periods=3, freq='2D', name='x') - - delta = np.timedelta64(1, 'D') - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2011-01-02', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = date_range('2010-12-31', periods=3, freq='2D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '2D') - - delta = np.array([np.timedelta64(1, 'D'), np.timedelta64(2, 'D'), - np.timedelta64(3, 'D')]) - for result in [idx + delta, np.add(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-08'], - freq='3D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '3D') - - for result in [idx - delta, np.subtract(idx, delta)]: - tm.assertIsInstance(result, DatetimeIndex) - exp = DatetimeIndex(['2010-12-31', '2011-01-01', '2011-01-02'], - freq='D', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'D') - - def test_fillna_datetime64(self): - # GH 11343 - for tz in ['US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, - '2011-01-01 11:00']) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', - '2011-01-01 11:00']) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # tz mismatch - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), - pd.Timestamp('2011-01-01 10:00', tz=tz), - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', - pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - idx = pd.DatetimeIndex( - ['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], tz=tz) - - exp = pd.DatetimeIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' - ], tz=tz) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - pd.Timestamp('2011-01-01 10:00'), - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal( - idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) - - # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), - 'x', - pd.Timestamp('2011-01-01 11:00', tz=tz)], - dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - -class TestPeriodIndex(DatetimeLike, tm.TestCase): - _holder = PeriodIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makePeriodIndex(10)) - self.setup_indices() - - def create_index(self): - return period_range('20130101', periods=5, freq='D') - - def test_shift(self): - - # test shift for PeriodIndex - # GH8083 - drange = self.create_index() - result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', - '2013-01-05', '2013-01-06'], freq='D') - self.assert_index_equal(result, expected) - - def test_pickle_compat_construction(self): - pass - - def test_get_loc(self): - idx = pd.period_range('2000-01-01', periods=3) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual( - idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) - self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) - self.assertEqual( - idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - idx = pd.period_range('2000-01-01', periods=5)[::2] - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance='1 day'), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=pd.Timedelta('1D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=np.timedelta64(1, 'D')), 1) - self.assertEqual(idx.get_loc('2000-01-02T12', method='nearest', - tolerance=timedelta(1)), 1) - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc('2000-01-10', method='nearest', tolerance='foo') - - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' - with tm.assertRaisesRegexp(ValueError, msg): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') - with tm.assertRaises(KeyError): - idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') - - def test_get_indexer(self): - idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start') - tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) - - target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', - '2000-01-02T01'], freq='H') - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest'), [0, 1, 1]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', tolerance='1 hour'), - [0, -1, 1]) - - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' - with self.assertRaisesRegexp(ValueError, msg): - idx.get_indexer(target, 'nearest', tolerance='1 minute') - - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', tolerance='1 day'), [0, 1, 1]) - - def test_repeat(self): - # GH10183 - idx = pd.period_range('2000-01-01', periods=3, freq='D') - res = idx.repeat(3) - exp = PeriodIndex(idx.values.repeat(3), freq='D') - self.assert_index_equal(res, exp) - self.assertEqual(res.freqstr, 'D') - - def test_period_index_indexer(self): - - # GH4125 - idx = pd.period_range('2002-01', '2003-12', freq='M') - df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) - self.assert_frame_equal(df, df.ix[idx]) - self.assert_frame_equal(df, df.ix[list(idx)]) - self.assert_frame_equal(df, df.loc[list(idx)]) - self.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) - self.assert_frame_equal(df, df.loc[list(idx)]) - - def test_fillna_period(self): - # GH 11343 - idx = pd.PeriodIndex( - ['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], freq='H') - - exp = pd.PeriodIndex( - ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' - ], freq='H') - self.assert_index_equal( - idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) - - exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', - pd.Period('2011-01-01 11:00', freq='H')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - with tm.assertRaisesRegexp( - ValueError, - 'Input has different freq=D from PeriodIndex\\(freq=H\\)'): - idx.fillna(pd.Period('2011-01-01', freq='D')) - - def test_no_millisecond_field(self): - with self.assertRaises(AttributeError): - DatetimeIndex.millisecond - - with self.assertRaises(AttributeError): - DatetimeIndex([]).millisecond - - -class TestTimedeltaIndex(DatetimeLike, tm.TestCase): - _holder = TimedeltaIndex - _multiprocess_can_split_ = True - - def setUp(self): - self.indices = dict(index=tm.makeTimedeltaIndex(10)) - self.setup_indices() - - def create_index(self): - return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) - - def test_shift(self): - # test shift for TimedeltaIndex - # err8083 - - drange = self.create_index() - result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', - '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'], - freq='D') - self.assert_index_equal(result, expected) - - result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', - '8 days 01:00:03', '9 days 01:00:03', - '10 days 01:00:03'], freq='D') - self.assert_index_equal(result, expected) - - def test_get_loc(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - - for method in [None, 'pad', 'backfill', 'nearest']: - self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) - self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) - self.assertEqual( - idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) - - with tm.assertRaisesRegexp(ValueError, 'must be convertible'): - idx.get_loc(idx[1], method='nearest', tolerance='foo') - - for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]: - self.assertEqual(idx.get_loc('1 day 1 hour', method), loc) - - def test_get_indexer(self): - idx = pd.to_timedelta(['0 days', '1 days', '2 days']) - tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) - - target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest'), [0, 1, 1]) - tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', - tolerance=pd.Timedelta('1 hour')), - [0, -1, 1]) - - def test_numeric_compat(self): - - idx = self._holder(np.arange(5, dtype='int64')) - didx = self._holder(np.arange(5, dtype='int64') ** 2) - result = idx * 1 - tm.assert_index_equal(result, idx) - - result = 1 * idx - tm.assert_index_equal(result, idx) - - result = idx / 1 - tm.assert_index_equal(result, idx) - - result = idx // 1 - tm.assert_index_equal(result, idx) - - result = idx * np.array(5, dtype='int64') - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='int64') * 5)) - - result = idx * np.arange(5, dtype='int64') - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='int64')) - tm.assert_index_equal(result, didx) - - result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, self._holder(np.arange( - 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) - - # invalid - self.assertRaises(TypeError, lambda: idx * idx) - self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) - - def test_pickle_compat_construction(self): - pass - - def test_ufunc_coercions(self): - # normal ops are also tested in tseries/test_timedeltas.py - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - - for result in [idx * 2, np.multiply(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['4H', '8H', '12H', '16H', '20H'], - freq='4H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '4H') - - for result in [idx / 2, np.divide(idx, 2)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['1H', '2H', '3H', '4H', '5H'], - freq='H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, 'H') - - idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') - for result in [-idx, np.negative(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], - freq='-2H', name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, '-2H') - - idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], - freq='H', name='x') - for result in [abs(idx), np.absolute(idx)]: - tm.assertIsInstance(result, TimedeltaIndex) - exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], - freq=None, name='x') - tm.assert_index_equal(result, exp) - self.assertEqual(result.freq, None) - - def test_fillna_timedelta(self): - # GH 11343 - idx = pd.TimedeltaIndex(['1 day', pd.NaT, '3 day']) - - exp = pd.TimedeltaIndex(['1 day', '2 day', '3 day']) - self.assert_index_equal(idx.fillna(pd.Timedelta('2 day')), exp) - - exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) - idx.fillna(pd.Timedelta('3 hour')) - - exp = pd.Index( - [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) - self.assert_index_equal(idx.fillna('x'), exp) - - -class TestMultiIndex(Base, tm.TestCase): - _holder = MultiIndex - _multiprocess_can_split_ = True - _compat_props = ['shape', 'ndim', 'size', 'itemsize'] - - def setUp(self): - major_axis = Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = Index(['one', 'two']) - - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - self.index_names = ['first', 'second'] - self.indices = dict(index=MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels - ], names=self.index_names, - verify_integrity=False)) - self.setup_indices() - - def create_index(self): - return self.index - - def test_boolean_context_compat2(self): - - # boolean context compat - # GH7897 - i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)]) - common = i1.intersection(i2) - - def f(): - if common: - pass - - tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) - - def test_labels_dtypes(self): - - # GH 8456 - i = MultiIndex.from_tuples([('A', 1), ('A', 2)]) - self.assertTrue(i.labels[0].dtype == 'int8') - self.assertTrue(i.labels[1].dtype == 'int8') - - i = MultiIndex.from_product([['a'], range(40)]) - self.assertTrue(i.labels[1].dtype == 'int8') - i = MultiIndex.from_product([['a'], range(400)]) - self.assertTrue(i.labels[1].dtype == 'int16') - i = MultiIndex.from_product([['a'], range(40000)]) - self.assertTrue(i.labels[1].dtype == 'int32') - - i = pd.MultiIndex.from_product([['a'], range(1000)]) - self.assertTrue((i.labels[0] >= 0).all()) - self.assertTrue((i.labels[1] >= 0).all()) - - def test_set_name_methods(self): - # so long as these are synonyms, we don't need to test set_names - self.assertEqual(self.index.rename, self.index.set_names) - new_names = [name + "SUFFIX" for name in self.index_names] - ind = self.index.set_names(new_names) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, new_names) - with assertRaisesRegexp(ValueError, "^Length"): - ind.set_names(new_names + new_names) - new_names2 = [name + "SUFFIX2" for name in new_names] - res = ind.set_names(new_names2, inplace=True) - self.assertIsNone(res) - self.assertEqual(ind.names, new_names2) - - # set names for specific level (# GH7792) - ind = self.index.set_names(new_names[0], level=0) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, [new_names[0], self.index_names[1]]) - - res = ind.set_names(new_names2[0], level=0, inplace=True) - self.assertIsNone(res) - self.assertEqual(ind.names, [new_names2[0], self.index_names[1]]) - - # set names for multiple levels - ind = self.index.set_names(new_names, level=[0, 1]) - self.assertEqual(self.index.names, self.index_names) - self.assertEqual(ind.names, new_names) - - res = ind.set_names(new_names2, level=[0, 1], inplace=True) - self.assertIsNone(res) - self.assertEqual(ind.names, new_names2) - - def test_set_levels(self): - # side note - you probably wouldn't want to use levels and labels - # directly like this - but it is possible. - levels = self.index.levels - new_levels = [[lev + 'a' for lev in level] for level in levels] - - def assert_matching(actual, expected): - # avoid specifying internal representation - # as much as possible - self.assertEqual(len(actual), len(expected)) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp) - assert_almost_equal(act, exp) - - # level changing [w/o mutation] - ind2 = self.index.set_levels(new_levels) - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # level changing [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels, inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.levels, new_levels) - - # level changing specific level [w/o mutation] - ind2 = self.index.set_levels(new_levels[0], level=0) - assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(self.index.levels, levels) - - ind2 = self.index.set_levels(new_levels[1], level=1) - assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(self.index.levels, levels) - - # level changing multiple levels [w/o mutation] - ind2 = self.index.set_levels(new_levels, level=[0, 1]) - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - # level changing specific level [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.levels, [new_levels[0], levels[1]]) - assert_matching(self.index.levels, levels) - - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.levels, [levels[0], new_levels[1]]) - assert_matching(self.index.levels, levels) - - # level changing multiple levels [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels, level=[0, 1], - inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.levels, new_levels) - assert_matching(self.index.levels, levels) - - def test_set_labels(self): - # side note - you probably wouldn't want to use levels and labels - # directly like this - but it is possible. - labels = self.index.labels - major_labels, minor_labels = labels - major_labels = [(x + 1) % 3 for x in major_labels] - minor_labels = [(x + 1) % 1 for x in minor_labels] - new_labels = [major_labels, minor_labels] - - def assert_matching(actual, expected): - # avoid specifying internal representation - # as much as possible - self.assertEqual(len(actual), len(expected)) - for act, exp in zip(actual, expected): - act = np.asarray(act) - exp = np.asarray(exp) - assert_almost_equal(act, exp) - - # label changing [w/o mutation] - ind2 = self.index.set_labels(new_labels) - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels, inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.labels, new_labels) - - # label changing specific level [w/o mutation] - ind2 = self.index.set_labels(new_labels[0], level=0) - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(self.index.labels, labels) - - ind2 = self.index.set_labels(new_labels[1], level=1) - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(self.index.labels, labels) - - # label changing multiple levels [w/o mutation] - ind2 = self.index.set_labels(new_labels, level=[0, 1]) - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - # label changing specific level [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.labels, [new_labels[0], labels[1]]) - assert_matching(self.index.labels, labels) - - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.labels, [labels[0], new_labels[1]]) - assert_matching(self.index.labels, labels) - - # label changing multiple levels [w/ mutation] - ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels, level=[0, 1], - inplace=True) - self.assertIsNone(inplace_return) - assert_matching(ind2.labels, new_labels) - assert_matching(self.index.labels, labels) - - def test_set_levels_labels_names_bad_input(self): - levels, labels = self.index.levels, self.index.labels - names = self.index.names - - with tm.assertRaisesRegexp(ValueError, 'Length of levels'): - self.index.set_levels([levels[0]]) - - with tm.assertRaisesRegexp(ValueError, 'Length of labels'): - self.index.set_labels([labels[0]]) - - with tm.assertRaisesRegexp(ValueError, 'Length of names'): - self.index.set_names([names[0]]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): - self.index.set_levels(levels[0]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): - self.index.set_labels(labels[0]) - - # shouldn't scalar data error, instead should demand list-like - with tm.assertRaisesRegexp(TypeError, 'list-like'): - self.index.set_names(names[0]) - - # should have equal lengths - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): - self.index.set_levels(levels[0], level=[0, 1]) - - with tm.assertRaisesRegexp(TypeError, 'list-like'): - self.index.set_levels(levels, level=0) - - # should have equal lengths - with tm.assertRaisesRegexp(TypeError, 'list of lists-like'): - self.index.set_labels(labels[0], level=[0, 1]) - - with tm.assertRaisesRegexp(TypeError, 'list-like'): - self.index.set_labels(labels, level=0) - - # should have equal lengths - with tm.assertRaisesRegexp(ValueError, 'Length of names'): - self.index.set_names(names[0], level=[0, 1]) - - with tm.assertRaisesRegexp(TypeError, 'string'): - self.index.set_names(names, level=0) - - def test_metadata_immutable(self): - levels, labels = self.index.levels, self.index.labels - # shouldn't be able to set at either the top level or base level - mutable_regex = re.compile('does not support mutable operations') - with assertRaisesRegexp(TypeError, mutable_regex): - levels[0] = levels[0] - with assertRaisesRegexp(TypeError, mutable_regex): - levels[0][0] = levels[0][0] - # ditto for labels - with assertRaisesRegexp(TypeError, mutable_regex): - labels[0] = labels[0] - with assertRaisesRegexp(TypeError, mutable_regex): - labels[0][0] = labels[0][0] - # and for names - names = self.index.names - with assertRaisesRegexp(TypeError, mutable_regex): - names[0] = names[0] - - def test_inplace_mutation_resets_values(self): - levels = [['a', 'b', 'c'], [4]] - levels2 = [[1, 2, 3], ['a']] - labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] - mi1 = MultiIndex(levels=levels, labels=labels) - mi2 = MultiIndex(levels=levels2, labels=labels) - vals = mi1.values.copy() - vals2 = mi2.values.copy() - self.assertIsNotNone(mi1._tuples) - - # make sure level setting works - new_vals = mi1.set_levels(levels2).values - assert_almost_equal(vals2, new_vals) - # non-inplace doesn't kill _tuples [implementation detail] - assert_almost_equal(mi1._tuples, vals) - # and values is still same too - assert_almost_equal(mi1.values, vals) - - # inplace should kill _tuples - mi1.set_levels(levels2, inplace=True) - assert_almost_equal(mi1.values, vals2) - - # make sure label setting works too - labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] - exp_values = np.empty((6, ), dtype=object) - exp_values[:] = [(long(1), 'a')] * 6 - # must be 1d array of tuples - self.assertEqual(exp_values.shape, (6, )) - new_values = mi2.set_labels(labels2).values - # not inplace shouldn't change - assert_almost_equal(mi2._tuples, vals2) - # should have correct values - assert_almost_equal(exp_values, new_values) - - # and again setting inplace should kill _tuples, etc - mi2.set_labels(labels2, inplace=True) - assert_almost_equal(mi2.values, new_values) - - def test_copy_in_constructor(self): - levels = np.array(["a", "b", "c"]) - labels = np.array([1, 1, 2, 0, 0, 1, 1]) - val = labels[0] - mi = MultiIndex(levels=[levels, levels], labels=[labels, labels], - copy=True) - self.assertEqual(mi.labels[0][0], val) - labels[0] = 15 - self.assertEqual(mi.labels[0][0], val) - val = levels[0] - levels[0] = "PANDA" - self.assertEqual(mi.levels[0][0], val) - - def test_set_value_keeps_names(self): - # motivating example from #3742 - lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] - lev2 = ['1', '2', '3'] * 2 - idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number']) - df = pd.DataFrame( - np.random.randn(6, 4), - columns=['one', 'two', 'three', 'four'], - index=idx) - df = df.sortlevel() - self.assertIsNone(df.is_copy) - self.assertEqual(df.index.names, ('Name', 'Number')) - df = df.set_value(('grethe', '4'), 'one', 99.34) - self.assertIsNone(df.is_copy) - self.assertEqual(df.index.names, ('Name', 'Number')) - - def test_names(self): - - # names are assigned in __init__ - names = self.index_names - level_names = [level.name for level in self.index.levels] - self.assertEqual(names, level_names) - - # setting bad names on existing - index = self.index - assertRaisesRegexp(ValueError, "^Length of names", setattr, index, - "names", list(index.names) + ["third"]) - assertRaisesRegexp(ValueError, "^Length of names", setattr, index, - "names", []) - - # initializing with bad names (should always be equivalent) - major_axis, minor_axis = self.index.levels - major_labels, minor_labels = self.index.labels - assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first']) - assertRaisesRegexp(ValueError, "^Length of names", MultiIndex, - levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=['first', 'second', 'third']) - - # names are assigned - index.names = ["a", "b"] - ind_names = list(index.names) - level_names = [level.name for level in index.levels] - self.assertEqual(ind_names, level_names) - - def test_reference_duplicate_name(self): - idx = MultiIndex.from_tuples( - [('a', 'b'), ('c', 'd')], names=['x', 'x']) - self.assertTrue(idx._reference_duplicate_name('x')) - - idx = MultiIndex.from_tuples( - [('a', 'b'), ('c', 'd')], names=['x', 'y']) - self.assertFalse(idx._reference_duplicate_name('x')) - - def test_astype(self): - expected = self.index.copy() - actual = self.index.astype('O') - assert_copy(actual.levels, expected.levels) - assert_copy(actual.labels, expected.labels) - self.check_level_names(actual, expected.names) - - with assertRaisesRegexp(TypeError, "^Setting.*dtype.*object"): - self.index.astype(np.dtype(int)) - - def test_constructor_single_level(self): - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) - tm.assertIsInstance(single_level, Index) - self.assertNotIsInstance(single_level, MultiIndex) - self.assertEqual(single_level.name, 'first') - - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]]) - self.assertIsNone(single_level.name) - - def test_constructor_no_levels(self): - assertRaisesRegexp(ValueError, "non-zero number of levels/labels", - MultiIndex, levels=[], labels=[]) - both_re = re.compile('Must pass both levels and labels') - with tm.assertRaisesRegexp(TypeError, both_re): - MultiIndex(levels=[]) - with tm.assertRaisesRegexp(TypeError, both_re): - MultiIndex(labels=[]) - - def test_constructor_mismatched_label_levels(self): - labels = [np.array([1]), np.array([2]), np.array([3])] - levels = ["a"] - assertRaisesRegexp(ValueError, "Length of levels and labels must be" - " the same", MultiIndex, levels=levels, - labels=labels) - length_error = re.compile('>= length of level') - label_error = re.compile(r'Unequal label lengths: \[4, 2\]') - - # important to check that it's looking at the right thing. - with tm.assertRaisesRegexp(ValueError, length_error): - MultiIndex(levels=[['a'], ['b']], - labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) - - with tm.assertRaisesRegexp(ValueError, label_error): - MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) - - # external API - with tm.assertRaisesRegexp(ValueError, length_error): - self.index.copy().set_levels([['a'], ['b']]) - - with tm.assertRaisesRegexp(ValueError, label_error): - self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) - - # deprecated properties - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - - with tm.assertRaisesRegexp(ValueError, length_error): - self.index.copy().levels = [['a'], ['b']] - - with tm.assertRaisesRegexp(ValueError, label_error): - self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] - - def assert_multiindex_copied(self, copy, original): - # levels shoudl be (at least, shallow copied) - assert_copy(copy.levels, original.levels) - - assert_almost_equal(copy.labels, original.labels) - - # labels doesn't matter which way copied - assert_almost_equal(copy.labels, original.labels) - self.assertIsNot(copy.labels, original.labels) - - # names doesn't matter which way copied - self.assertEqual(copy.names, original.names) - self.assertIsNot(copy.names, original.names) - - # sort order should be copied - self.assertEqual(copy.sortorder, original.sortorder) - - def test_copy(self): - i_copy = self.index.copy() - - self.assert_multiindex_copied(i_copy, self.index) - - def test_shallow_copy(self): - i_copy = self.index._shallow_copy() - - self.assert_multiindex_copied(i_copy, self.index) - - def test_view(self): - i_view = self.index.view() - - self.assert_multiindex_copied(i_view, self.index) - - def check_level_names(self, index, names): - self.assertEqual([level.name for level in index.levels], list(names)) - - def test_changing_names(self): - - # names should be applied to levels - level_names = [level.name for level in self.index.levels] - self.check_level_names(self.index, self.index.names) - - view = self.index.view() - copy = self.index.copy() - shallow_copy = self.index._shallow_copy() - - # changing names should change level names on object - new_names = [name + "a" for name in self.index.names] - self.index.names = new_names - self.check_level_names(self.index, new_names) - - # but not on copies - self.check_level_names(view, level_names) - self.check_level_names(copy, level_names) - self.check_level_names(shallow_copy, level_names) - - # and copies shouldn't change original - shallow_copy.names = [name + "c" for name in shallow_copy.names] - self.check_level_names(self.index, new_names) - - def test_duplicate_names(self): - self.index.names = ['foo', 'foo'] - assertRaisesRegexp(KeyError, 'Level foo not found', - self.index._get_level_number, 'foo') - - def test_get_level_number_integer(self): - self.index.names = [1, 0] - self.assertEqual(self.index._get_level_number(1), 0) - self.assertEqual(self.index._get_level_number(0), 1) - self.assertRaises(IndexError, self.index._get_level_number, 2) - assertRaisesRegexp(KeyError, 'Level fourth not found', - self.index._get_level_number, 'fourth') - - def test_from_arrays(self): - arrays = [] - for lev, lab in zip(self.index.levels, self.index.labels): - arrays.append(np.asarray(lev).take(lab)) - - result = MultiIndex.from_arrays(arrays) - self.assertEqual(list(result), list(self.index)) - - # infer correctly - result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], - ['a', 'b']]) - self.assertTrue(result.levels[0].equals(Index([Timestamp('20130101') - ]))) - self.assertTrue(result.levels[1].equals(Index(['a', 'b']))) - - def test_from_product(self): - - first = ['foo', 'bar', 'buz'] - second = ['a', 'b', 'c'] - names = ['first', 'second'] - result = MultiIndex.from_product([first, second], names=names) - - tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), - ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), - ('buz', 'c')] - expected = MultiIndex.from_tuples(tuples, names=names) - - tm.assert_numpy_array_equal(result, expected) - self.assertEqual(result.names, names) - - def test_from_product_datetimeindex(self): - dt_index = date_range('2000-01-01', periods=2) - mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = pd.lib.list_to_object_array([(1, pd.Timestamp( - '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( - '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) - tm.assert_numpy_array_equal(mi.values, etalon) - - def test_values_boxed(self): - tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), - (3, pd.Timestamp('2000-01-03')), - (1, pd.Timestamp('2000-01-04')), - (2, pd.Timestamp('2000-01-02')), - (3, pd.Timestamp('2000-01-03'))] - mi = pd.MultiIndex.from_tuples(tuples) - tm.assert_numpy_array_equal(mi.values, - pd.lib.list_to_object_array(tuples)) - # Check that code branches for boxed values produce identical results - tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) - - def test_append(self): - result = self.index[:3].append(self.index[3:]) - self.assertTrue(result.equals(self.index)) - - foos = [self.index[:1], self.index[1:3], self.index[3:]] - result = foos[0].append(foos[1:]) - self.assertTrue(result.equals(self.index)) - - # empty - result = self.index.append([]) - self.assertTrue(result.equals(self.index)) - - def test_get_level_values(self): - result = self.index.get_level_values(0) - expected = ['foo', 'foo', 'bar', 'baz', 'qux', 'qux'] - tm.assert_numpy_array_equal(result, expected) - - self.assertEqual(result.name, 'first') - - result = self.index.get_level_values('first') - expected = self.index.get_level_values(0) - tm.assert_numpy_array_equal(result, expected) - - # GH 10460 - index = MultiIndex(levels=[CategoricalIndex( - ['A', 'B']), CategoricalIndex([1, 2, 3])], labels=[np.array( - [0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])]) - exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) - self.assert_index_equal(index.get_level_values(0), exp) - exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) - self.assert_index_equal(index.get_level_values(1), exp) - - def test_get_level_values_na(self): - arrays = [['a', 'b', 'b'], [1, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(1) - expected = [1, np.nan, 2] - tm.assert_numpy_array_equal(values.values.astype(float), expected) - - arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]] - index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(1) - expected = [np.nan, np.nan, 2] - tm.assert_numpy_array_equal(values.values.astype(float), expected) - - arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]] - index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(0) - expected = [np.nan, np.nan, np.nan] - tm.assert_numpy_array_equal(values.values.astype(float), expected) - values = index.get_level_values(1) - expected = np.array(['a', np.nan, 1], dtype=object) - tm.assert_numpy_array_equal(values.values, expected) - - arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] - index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(1) - expected = pd.DatetimeIndex([0, 1, pd.NaT]) - tm.assert_numpy_array_equal(values.values, expected.values) - - arrays = [[], []] - index = pd.MultiIndex.from_arrays(arrays) - values = index.get_level_values(0) - self.assertEqual(values.shape, (0, )) - - def test_reorder_levels(self): - # this blows up - assertRaisesRegexp(IndexError, '^Too many levels', - self.index.reorder_levels, [2, 1, 0]) - - def test_nlevels(self): - self.assertEqual(self.index.nlevels, 2) - - def test_iter(self): - result = list(self.index) - expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')] - self.assertEqual(result, expected) - - def test_legacy_pickle(self): - if PY3: - raise nose.SkipTest("testing for legacy pickles not " - "support on py3") - - path = tm.get_data_path('multiindex_v1.pickle') - obj = pd.read_pickle(path) - - obj2 = MultiIndex.from_tuples(obj.values) - self.assertTrue(obj.equals(obj2)) - - res = obj.get_indexer(obj) - exp = np.arange(len(obj)) - assert_almost_equal(res, exp) - - res = obj.get_indexer(obj2[::-1]) - exp = obj.get_indexer(obj[::-1]) - exp2 = obj2.get_indexer(obj2[::-1]) - assert_almost_equal(res, exp) - assert_almost_equal(exp, exp2) - - def test_legacy_v2_unpickle(self): - - # 0.7.3 -> 0.8.0 format manage - path = tm.get_data_path('mindex_073.pickle') - obj = pd.read_pickle(path) - - obj2 = MultiIndex.from_tuples(obj.values) - self.assertTrue(obj.equals(obj2)) - - res = obj.get_indexer(obj) - exp = np.arange(len(obj)) - assert_almost_equal(res, exp) - - res = obj.get_indexer(obj2[::-1]) - exp = obj.get_indexer(obj[::-1]) - exp2 = obj2.get_indexer(obj2[::-1]) - assert_almost_equal(res, exp) - assert_almost_equal(exp, exp2) - - def test_roundtrip_pickle_with_tz(self): - - # GH 8367 - # round-trip of timezone - index = MultiIndex.from_product( - [[1, 2], ['a', 'b'], date_range('20130101', periods=3, - tz='US/Eastern') - ], names=['one', 'two', 'three']) - unpickled = self.round_trip_pickle(index) - self.assertTrue(index.equal_levels(unpickled)) - - def test_from_tuples_index_values(self): - result = MultiIndex.from_tuples(self.index) - self.assertTrue((result.values == self.index.values).all()) - - def test_contains(self): - self.assertIn(('foo', 'two'), self.index) - self.assertNotIn(('bar', 'two'), self.index) - self.assertNotIn(None, self.index) - - def test_is_all_dates(self): - self.assertFalse(self.index.is_all_dates) - - def test_is_numeric(self): - # MultiIndex is never numeric - self.assertFalse(self.index.is_numeric()) - - def test_getitem(self): - # scalar - self.assertEqual(self.index[2], ('bar', 'one')) - - # slice - result = self.index[2:5] - expected = self.index[[2, 3, 4]] - self.assertTrue(result.equals(expected)) - - # boolean - result = self.index[[True, False, True, False, True, True]] - result2 = self.index[np.array([True, False, True, False, True, True])] - expected = self.index[[0, 2, 4, 5]] - self.assertTrue(result.equals(expected)) - self.assertTrue(result2.equals(expected)) - - def test_getitem_group_select(self): - sorted_idx, _ = self.index.sortlevel(0) - self.assertEqual(sorted_idx.get_loc('baz'), slice(3, 4)) - self.assertEqual(sorted_idx.get_loc('foo'), slice(0, 2)) - - def test_get_loc(self): - self.assertEqual(self.index.get_loc(('foo', 'two')), 1) - self.assertEqual(self.index.get_loc(('baz', 'two')), 3) - self.assertRaises(KeyError, self.index.get_loc, ('bar', 'two')) - self.assertRaises(KeyError, self.index.get_loc, 'quux') - - self.assertRaises(NotImplementedError, self.index.get_loc, 'foo', - method='nearest') - - # 3 levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - self.assertRaises(KeyError, index.get_loc, (1, 1)) - self.assertEqual(index.get_loc((2, 0)), slice(3, 5)) - - def test_get_loc_duplicates(self): - index = Index([2, 2, 2, 2]) - result = index.get_loc(2) - expected = slice(0, 4) - self.assertEqual(result, expected) - # self.assertRaises(Exception, index.get_loc, 2) - - index = Index(['c', 'a', 'a', 'b', 'b']) - rs = index.get_loc('c') - xp = 0 - assert (rs == xp) - - def test_get_loc_level(self): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - loc, new_index = index.get_loc_level((0, 1)) - expected = slice(1, 2) - exp_index = index[expected].droplevel(0).droplevel(0) - self.assertEqual(loc, expected) - self.assertTrue(new_index.equals(exp_index)) - - loc, new_index = index.get_loc_level((0, 1, 0)) - expected = 1 - self.assertEqual(loc, expected) - self.assertIsNone(new_index) - - self.assertRaises(KeyError, index.get_loc_level, (2, 2)) - - index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( - [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) - result, new_index = index.get_loc_level((2000, slice(None, None))) - expected = slice(None, None) - self.assertEqual(result, expected) - self.assertTrue(new_index.equals(index.droplevel(0))) - - def test_slice_locs(self): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index - - slob = slice(*idx.slice_locs(df.index[5], df.index[15])) - sliced = stacked[slob] - expected = df[5:16].stack() - tm.assert_almost_equal(sliced.values, expected.values) - - slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30), - df.index[15] - timedelta(seconds=30))) - sliced = stacked[slob] - expected = df[6:15].stack() - tm.assert_almost_equal(sliced.values, expected.values) - - def test_slice_locs_with_type_mismatch(self): - df = tm.makeTimeDataFrame() - stacked = df.stack() - idx = stacked.index - assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, - (1, 3)) - assertRaisesRegexp(TypeError, '^Level type mismatch', idx.slice_locs, - df.index[5] + timedelta(seconds=30), (5, 2)) - df = tm.makeCustomDataframe(5, 5) - stacked = df.stack() - idx = stacked.index - with assertRaisesRegexp(TypeError, '^Level type mismatch'): - idx.slice_locs(timedelta(seconds=30)) - # TODO: Try creating a UnicodeDecodeError in exception message - with assertRaisesRegexp(TypeError, '^Level type mismatch'): - idx.slice_locs(df.index[1], (16, "a")) - - def test_slice_locs_not_sorted(self): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - assertRaisesRegexp(KeyError, "[Kk]ey length.*greater than MultiIndex" - " lexsort depth", index.slice_locs, (1, 0, 1), - (2, 1, 0)) - - # works - sorted_index, _ = index.sortlevel(0) - # should there be a test case here??? - sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) - - def test_slice_locs_partial(self): - sorted_idx, _ = self.index.sortlevel(0) - - result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one')) - self.assertEqual(result, (1, 5)) - - result = sorted_idx.slice_locs(None, ('qux', 'one')) - self.assertEqual(result, (0, 5)) - - result = sorted_idx.slice_locs(('foo', 'two'), None) - self.assertEqual(result, (1, len(sorted_idx))) - - result = sorted_idx.slice_locs('bar', 'baz') - self.assertEqual(result, (2, 4)) - - def test_slice_locs_not_contained(self): - # some searchsorted action - - index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], - labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], - [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) - - result = index.slice_locs((1, 0), (5, 2)) - self.assertEqual(result, (3, 6)) - - result = index.slice_locs(1, 5) - self.assertEqual(result, (3, 6)) - - result = index.slice_locs((2, 2), (5, 2)) - self.assertEqual(result, (3, 6)) - - result = index.slice_locs(2, 5) - self.assertEqual(result, (3, 6)) - - result = index.slice_locs((1, 0), (6, 3)) - self.assertEqual(result, (3, 8)) - - result = index.slice_locs(-1, 10) - self.assertEqual(result, (0, len(index))) - - def test_consistency(self): - # need to construct an overflow - major_axis = lrange(70000) - minor_axis = lrange(10) - - major_labels = np.arange(70000) - minor_labels = np.repeat(lrange(10), 7000) - - # the fact that is works means it's consistent - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - # inconsistent - major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - self.assertFalse(index.is_unique) - - def test_truncate(self): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - - result = index.truncate(before=1) - self.assertNotIn('foo', result.levels[0]) - self.assertIn(1, result.levels[0]) - - result = index.truncate(after=1) - self.assertNotIn(2, result.levels[0]) - self.assertIn(1, result.levels[0]) - - result = index.truncate(before=1, after=2) - self.assertEqual(len(result.levels[0]), 2) - - # after < before - self.assertRaises(ValueError, index.truncate, 3, 1) - - def test_get_indexer(self): - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - idx1 = index[:5] - idx2 = index[[1, 3, 5]] - - r1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, [1, 3, -1]) - - r1 = idx2.get_indexer(idx1, method='pad') - e1 = [-1, 0, 0, 1, 1] - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='pad') - assert_almost_equal(r2, e1[::-1]) - - rffill1 = idx2.get_indexer(idx1, method='ffill') - assert_almost_equal(r1, rffill1) - - r1 = idx2.get_indexer(idx1, method='backfill') - e1 = [0, 0, 1, 1, 2] - assert_almost_equal(r1, e1) - - r2 = idx2.get_indexer(idx1[::-1], method='backfill') - assert_almost_equal(r2, e1[::-1]) - - rbfill1 = idx2.get_indexer(idx1, method='bfill') - assert_almost_equal(r1, rbfill1) - - # pass non-MultiIndex - r1 = idx1.get_indexer(idx2._tuple_index) - rexp1 = idx1.get_indexer(idx2) - assert_almost_equal(r1, rexp1) - - r1 = idx1.get_indexer([1, 2, 3]) - self.assertTrue((r1 == [-1, -1, -1]).all()) - - # create index with duplicates - idx1 = Index(lrange(10) + lrange(10)) - idx2 = Index(lrange(20)) - assertRaisesRegexp(InvalidIndexError, "Reindexing only valid with" - " uniquely valued Index objects", idx1.get_indexer, - idx2) - - def test_get_indexer_nearest(self): - midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) - with tm.assertRaises(NotImplementedError): - midx.get_indexer(['a'], method='nearest') - with tm.assertRaises(NotImplementedError): - midx.get_indexer(['a'], method='pad', tolerance=2) - - def test_format(self): - self.index.format() - self.index[:0].format() - - def test_format_integer_names(self): - index = MultiIndex(levels=[[0, 1], [0, 1]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) - index.format(names=True) - - def test_format_sparse_display(self): - index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], - labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], - [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) - - result = index.format() - self.assertEqual(result[3], '1 0 0 0') - - def test_format_sparse_config(self): - warn_filters = warnings.filters - warnings.filterwarnings('ignore', category=FutureWarning, - module=".*format") - # GH1538 - pd.set_option('display.multi_sparse', False) - - result = self.index.format() - self.assertEqual(result[1], 'foo two') - - self.reset_display_options() - - warnings.filters = warn_filters - - def test_to_hierarchical(self): - index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two')]) - result = index.to_hierarchical(3) - expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], - [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) - tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) - - # K > 1 - result = index.to_hierarchical(3, 2) - expected = MultiIndex(levels=[[1, 2], ['one', 'two']], - labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1], - [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]]) - tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) - - # non-sorted - index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'), - (2, 'a'), (2, 'b')], - names=['N1', 'N2']) - - result = index.to_hierarchical(2) - expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), - (1, 'b'), - (2, 'a'), (2, 'a'), - (2, 'b'), (2, 'b')], - names=['N1', 'N2']) - tm.assert_index_equal(result, expected) - self.assertEqual(result.names, index.names) - - def test_bounds(self): - self.index._bounds - - def test_equals(self): - self.assertTrue(self.index.equals(self.index)) - self.assertTrue(self.index.equal_levels(self.index)) - - self.assertFalse(self.index.equals(self.index[:-1])) - - self.assertTrue(self.index.equals(self.index._tuple_index)) - - # different number of levels - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) - self.assertFalse(index.equals(index2)) - self.assertFalse(index.equal_levels(index2)) - - # levels are different - major_axis = Index(lrange(4)) - minor_axis = Index(lrange(2)) - - major_labels = np.array([0, 0, 1, 2, 2, 3]) - minor_labels = np.array([0, 1, 0, 0, 1, 0]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) - self.assertFalse(self.index.equal_levels(index)) - - # some of the labels are different - major_axis = Index(['foo', 'bar', 'baz', 'qux']) - minor_axis = Index(['one', 'two']) - - major_labels = np.array([0, 0, 2, 2, 3, 3]) - minor_labels = np.array([0, 1, 0, 1, 0, 1]) - - index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels]) - self.assertFalse(self.index.equals(index)) - - def test_identical(self): - mi = self.index.copy() - mi2 = self.index.copy() - self.assertTrue(mi.identical(mi2)) - - mi = mi.set_names(['new1', 'new2']) - self.assertTrue(mi.equals(mi2)) - self.assertFalse(mi.identical(mi2)) - - mi2 = mi2.set_names(['new1', 'new2']) - self.assertTrue(mi.identical(mi2)) - - mi3 = Index(mi.tolist(), names=mi.names) - mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False) - self.assertTrue(mi.identical(mi3)) - self.assertFalse(mi.identical(mi4)) - self.assertTrue(mi.equals(mi4)) - - def test_is_(self): - - mi = MultiIndex.from_tuples(lzip(range(10), range(10))) - self.assertTrue(mi.is_(mi)) - self.assertTrue(mi.is_(mi.view())) - self.assertTrue(mi.is_(mi.view().view().view().view())) - mi2 = mi.view() - # names are metadata, they don't change id - mi2.names = ["A", "B"] - self.assertTrue(mi2.is_(mi)) - self.assertTrue(mi.is_(mi2)) - - self.assertTrue(mi.is_(mi.set_names(["C", "D"]))) - mi2 = mi.view() - mi2.set_names(["E", "F"], inplace=True) - self.assertTrue(mi.is_(mi2)) - # levels are inherent properties, they change identity - mi3 = mi2.set_levels([lrange(10), lrange(10)]) - self.assertFalse(mi3.is_(mi2)) - # shouldn't change - self.assertTrue(mi2.is_(mi)) - mi4 = mi3.view() - mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True) - self.assertFalse(mi4.is_(mi3)) - mi5 = mi.view() - mi5.set_levels(mi5.levels, inplace=True) - self.assertFalse(mi5.is_(mi)) - - def test_union(self): - piece1 = self.index[:5][::-1] - piece2 = self.index[3:] - - the_union = piece1 | piece2 - - tups = sorted(self.index._tuple_index) - expected = MultiIndex.from_tuples(tups) - - self.assertTrue(the_union.equals(expected)) - - # corner case, pass self or empty thing: - the_union = self.index.union(self.index) - self.assertIs(the_union, self.index) - - the_union = self.index.union(self.index[:0]) - self.assertIs(the_union, self.index) - - # won't work in python 3 - # tuples = self.index._tuple_index - # result = self.index[:4] | tuples[4:] - # self.assertTrue(result.equals(tuples)) - - # not valid for python 3 - # def test_union_with_regular_index(self): - # other = Index(['A', 'B', 'C']) - - # result = other.union(self.index) - # self.assertIn(('foo', 'one'), result) - # self.assertIn('B', result) - - # result2 = self.index.union(other) - # self.assertTrue(result.equals(result2)) - - def test_intersection(self): - piece1 = self.index[:5][::-1] - piece2 = self.index[3:] - - the_int = piece1 & piece2 - tups = sorted(self.index[3:5]._tuple_index) - expected = MultiIndex.from_tuples(tups) - self.assertTrue(the_int.equals(expected)) - - # corner case, pass self - the_int = self.index.intersection(self.index) - self.assertIs(the_int, self.index) - - # empty intersection: disjoint - empty = self.index[:2] & self.index[2:] - expected = self.index[:0] - self.assertTrue(empty.equals(expected)) - - # can't do in python 3 - # tuples = self.index._tuple_index - # result = self.index & tuples - # self.assertTrue(result.equals(tuples)) - - def test_difference(self): - - first = self.index - result = first.difference(self.index[-3:]) - - # - API change GH 8226 - with tm.assert_produces_warning(): - first - self.index[-3:] - with tm.assert_produces_warning(): - self.index[-3:] - first - with tm.assert_produces_warning(): - self.index[-3:] - first.tolist() - - self.assertRaises(TypeError, lambda: first.tolist() - self.index[-3:]) - - expected = MultiIndex.from_tuples(sorted(self.index[:-3].values), - sortorder=0, - names=self.index.names) - - tm.assertIsInstance(result, MultiIndex) - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) - - # empty difference: reflexive - result = self.index.difference(self.index) - expected = self.index[:0] - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) - - # empty difference: superset - result = self.index[-3:].difference(self.index) - expected = self.index[:0] - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) - - # empty difference: degenerate - result = self.index[:0].difference(self.index) - expected = self.index[:0] - self.assertTrue(result.equals(expected)) - self.assertEqual(result.names, self.index.names) - - # names not the same - chunklet = self.index[-3:] - chunklet.names = ['foo', 'baz'] - result = first.difference(chunklet) - self.assertEqual(result.names, (None, None)) - - # empty, but non-equal - result = self.index.difference(self.index.sortlevel(1)[0]) - self.assertEqual(len(result), 0) - - # raise Exception called with non-MultiIndex - result = first.difference(first._tuple_index) - self.assertTrue(result.equals(first[:0])) - - # name from empty array - result = first.difference([]) - self.assertTrue(first.equals(result)) - self.assertEqual(first.names, result.names) - - # name from non-empty array - result = first.difference([('foo', 'one')]) - expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( - 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) - expected.names = first.names - self.assertEqual(first.names, result.names) - assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list" - " of tuples", first.difference, [1, 2, 3, 4, 5]) - - def test_from_tuples(self): - assertRaisesRegexp(TypeError, 'Cannot infer number of levels from' - ' empty list', MultiIndex.from_tuples, []) - - idx = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b']) - self.assertEqual(len(idx), 2) - - def test_argsort(self): - result = self.index.argsort() - expected = self.index._tuple_index.argsort() - tm.assert_numpy_array_equal(result, expected) - - def test_sortlevel(self): - import random - - tuples = list(self.index) - random.shuffle(tuples) - - index = MultiIndex.from_tuples(tuples) - - sorted_idx, _ = index.sortlevel(0) - expected = MultiIndex.from_tuples(sorted(tuples)) - self.assertTrue(sorted_idx.equals(expected)) - - sorted_idx, _ = index.sortlevel(0, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) - - sorted_idx, _ = index.sortlevel(1) - by1 = sorted(tuples, key=lambda x: (x[1], x[0])) - expected = MultiIndex.from_tuples(by1) - self.assertTrue(sorted_idx.equals(expected)) - - sorted_idx, _ = index.sortlevel(1, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) - - def test_sortlevel_not_sort_remaining(self): - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - sorted_idx, _ = mi.sortlevel('A', sort_remaining=False) - self.assertTrue(sorted_idx.equals(mi)) - - def test_sortlevel_deterministic(self): - tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'), - ('foo', 'one'), ('baz', 'two'), ('qux', 'one')] - - index = MultiIndex.from_tuples(tuples) - - sorted_idx, _ = index.sortlevel(0) - expected = MultiIndex.from_tuples(sorted(tuples)) - self.assertTrue(sorted_idx.equals(expected)) - - sorted_idx, _ = index.sortlevel(0, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) - - sorted_idx, _ = index.sortlevel(1) - by1 = sorted(tuples, key=lambda x: (x[1], x[0])) - expected = MultiIndex.from_tuples(by1) - self.assertTrue(sorted_idx.equals(expected)) - - sorted_idx, _ = index.sortlevel(1, ascending=False) - self.assertTrue(sorted_idx.equals(expected[::-1])) - - def test_dims(self): - pass - - def test_drop(self): - dropped = self.index.drop([('foo', 'two'), ('qux', 'one')]) - - index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')]) - dropped2 = self.index.drop(index) - - expected = self.index[[0, 2, 3, 5]] - self.assert_index_equal(dropped, expected) - self.assert_index_equal(dropped2, expected) - - dropped = self.index.drop(['bar']) - expected = self.index[[0, 1, 3, 4, 5]] - self.assert_index_equal(dropped, expected) - - dropped = self.index.drop('foo') - expected = self.index[[2, 3, 4, 5]] - self.assert_index_equal(dropped, expected) - - index = MultiIndex.from_tuples([('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, [('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, index) - self.assertRaises(KeyError, self.index.drop, ['foo', 'two']) - - # partially correct argument - mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')]) - self.assertRaises(KeyError, self.index.drop, mixed_index) - - # error='ignore' - dropped = self.index.drop(index, errors='ignore') - expected = self.index[[0, 1, 2, 3, 4, 5]] - self.assert_index_equal(dropped, expected) - - dropped = self.index.drop(mixed_index, errors='ignore') - expected = self.index[[0, 1, 2, 3, 5]] - self.assert_index_equal(dropped, expected) - - dropped = self.index.drop(['foo', 'two'], errors='ignore') - expected = self.index[[2, 3, 4, 5]] - self.assert_index_equal(dropped, expected) - - # mixed partial / full drop - dropped = self.index.drop(['foo', ('qux', 'one')]) - expected = self.index[[2, 3, 5]] - self.assert_index_equal(dropped, expected) - - # mixed partial / full drop / error='ignore' - mixed_index = ['foo', ('qux', 'one'), 'two'] - self.assertRaises(KeyError, self.index.drop, mixed_index) - dropped = self.index.drop(mixed_index, errors='ignore') - expected = self.index[[2, 3, 5]] - self.assert_index_equal(dropped, expected) - - def test_droplevel_with_names(self): - index = self.index[self.index.get_loc('foo')] - dropped = index.droplevel(0) - self.assertEqual(dropped.name, 'second') - - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) - dropped = index.droplevel(0) - self.assertEqual(dropped.names, ('two', 'three')) - - dropped = index.droplevel('two') - expected = index.droplevel(1) - self.assertTrue(dropped.equals(expected)) - - def test_droplevel_multiple(self): - index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( - lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( - [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) - - dropped = index[:2].droplevel(['three', 'one']) - expected = index[:2].droplevel(2).droplevel(0) - self.assertTrue(dropped.equals(expected)) - - def test_insert(self): - # key contained in all levels - new_index = self.index.insert(0, ('bar', 'two')) - self.assertTrue(new_index.equal_levels(self.index)) - self.assertEqual(new_index[0], ('bar', 'two')) - - # key not contained in all levels - new_index = self.index.insert(0, ('abc', 'three')) - tm.assert_numpy_array_equal(new_index.levels[0], - list(self.index.levels[0]) + ['abc']) - tm.assert_numpy_array_equal(new_index.levels[1], - list(self.index.levels[1]) + ['three']) - self.assertEqual(new_index[0], ('abc', 'three')) - - # key wrong length - assertRaisesRegexp(ValueError, "Item must have length equal to number" - " of levels", self.index.insert, 0, ('foo2', )) - - left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], - columns=['1st', '2nd', '3rd']) - left.set_index(['1st', '2nd'], inplace=True) - ts = left['3rd'].copy(deep=True) - - left.loc[('b', 'x'), '3rd'] = 2 - left.loc[('b', 'a'), '3rd'] = -1 - left.loc[('b', 'b'), '3rd'] = 3 - left.loc[('a', 'x'), '3rd'] = 4 - left.loc[('a', 'w'), '3rd'] = 5 - left.loc[('a', 'a'), '3rd'] = 6 - - ts.loc[('b', 'x')] = 2 - ts.loc['b', 'a'] = -1 - ts.loc[('b', 'b')] = 3 - ts.loc['a', 'x'] = 4 - ts.loc[('a', 'w')] = 5 - ts.loc['a', 'a'] = 6 - - right = pd.DataFrame([['a', 'b', 0], - ['b', 'd', 1], - ['b', 'x', 2], - ['b', 'a', -1], - ['b', 'b', 3], - ['a', 'x', 4], - ['a', 'w', 5], - ['a', 'a', 6]], - columns=['1st', '2nd', '3rd']) - right.set_index(['1st', '2nd'], inplace=True) - # FIXME data types changes to float because - # of intermediate nan insertion; - tm.assert_frame_equal(left, right, check_dtype=False) - tm.assert_series_equal(ts, right['3rd']) - - # GH9250 - idx = [('test1', i) for i in range(5)] + \ - [('test2', i) for i in range(6)] + \ - [('test', 17), ('test', 18)] - - left = pd.Series(np.linspace(0, 10, 11), - pd.MultiIndex.from_tuples(idx[:-2])) - - left.loc[('test', 17)] = 11 - left.ix[('test', 18)] = 12 - - right = pd.Series(np.linspace(0, 12, 13), - pd.MultiIndex.from_tuples(idx)) - - tm.assert_series_equal(left, right) - - def test_take_preserve_name(self): - taken = self.index.take([3, 0, 1]) - self.assertEqual(taken.names, self.index.names) - - def test_join_level(self): - def _check_how(other, how): - join_index, lidx, ridx = other.join(self.index, how=how, - level='second', - return_indexers=True) - - exp_level = other.join(self.index.levels[1], how=how) - self.assertTrue(join_index.levels[0].equals(self.index.levels[0])) - self.assertTrue(join_index.levels[1].equals(exp_level)) - - # pare down levels - mask = np.array( - [x[1] in exp_level for x in self.index], dtype=bool) - exp_values = self.index.values[mask] - tm.assert_numpy_array_equal(join_index.values, exp_values) - - if how in ('outer', 'inner'): - join_index2, ridx2, lidx2 = \ - self.index.join(other, how=how, level='second', - return_indexers=True) - - self.assertTrue(join_index.equals(join_index2)) - tm.assert_numpy_array_equal(lidx, lidx2) - tm.assert_numpy_array_equal(ridx, ridx2) - tm.assert_numpy_array_equal(join_index2.values, exp_values) - - def _check_all(other): - _check_how(other, 'outer') - _check_how(other, 'inner') - _check_how(other, 'left') - _check_how(other, 'right') - - _check_all(Index(['three', 'one', 'two'])) - _check_all(Index(['one'])) - _check_all(Index(['one', 'three'])) - - # some corner cases - idx = Index(['three', 'one', 'two']) - result = idx.join(self.index, level='second') - tm.assertIsInstance(result, MultiIndex) - - assertRaisesRegexp(TypeError, "Join.*MultiIndex.*ambiguous", - self.index.join, self.index, level=1) - - def test_join_self(self): - kinds = 'outer', 'inner', 'left', 'right' - for kind in kinds: - res = self.index - joined = res.join(res, how=kind) - self.assertIs(res, joined) - - def test_join_multi(self): - # GH 10665 - midx = pd.MultiIndex.from_product( - [np.arange(4), np.arange(4)], names=['a', 'b']) - idx = pd.Index([1, 2, 5], name='b') - - # inner - jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) - exp_idx = pd.MultiIndex.from_product( - [np.arange(4), [1, 2]], names=['a', 'b']) - exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14]) - exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1]) - self.assert_index_equal(jidx, exp_idx) - self.assert_numpy_array_equal(lidx, exp_lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) - # flip - jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True) - self.assert_index_equal(jidx, exp_idx) - self.assert_numpy_array_equal(lidx, exp_lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) - - # keep MultiIndex - jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) - exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, - 1, -1]) - self.assert_index_equal(jidx, midx) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) - # flip - jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True) - self.assert_index_equal(jidx, midx) - self.assertIsNone(lidx) - self.assert_numpy_array_equal(ridx, exp_ridx) - - def test_reindex(self): - result, indexer = self.index.reindex(list(self.index[:4])) - tm.assertIsInstance(result, MultiIndex) - self.check_level_names(result, self.index[:4].names) - - result, indexer = self.index.reindex(list(self.index)) - tm.assertIsInstance(result, MultiIndex) - self.assertIsNone(indexer) - self.check_level_names(result, self.index.names) - - def test_reindex_level(self): - idx = Index(['one']) - - target, indexer = self.index.reindex(idx, level='second') - target2, indexer2 = idx.reindex(self.index, level='second') - - exp_index = self.index.join(idx, level='second', how='right') - exp_index2 = self.index.join(idx, level='second', how='left') - - self.assertTrue(target.equals(exp_index)) - exp_indexer = np.array([0, 2, 4]) - tm.assert_numpy_array_equal(indexer, exp_indexer) - - self.assertTrue(target2.equals(exp_index2)) - exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) - tm.assert_numpy_array_equal(indexer2, exp_indexer2) - - assertRaisesRegexp(TypeError, "Fill method not supported", - self.index.reindex, self.index, method='pad', - level='second') - - assertRaisesRegexp(TypeError, "Fill method not supported", idx.reindex, - idx, method='bfill', level='first') - - def test_duplicates(self): - self.assertFalse(self.index.has_duplicates) - self.assertTrue(self.index.append(self.index).has_duplicates) - - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ - [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - self.assertTrue(index.has_duplicates) - - # GH 9075 - t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), - (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), - (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), - (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), - (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), - (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), - (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), - (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), - (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), - (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), - (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), - (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), - (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), - (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), - (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), - (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), - (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), - (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] - - index = pd.MultiIndex.from_tuples(t) - self.assertFalse(index.has_duplicates) - - # handle int64 overflow if possible - def check(nlevels, with_nulls): - labels = np.tile(np.arange(500), 2) - level = np.arange(500) - - if with_nulls: # inject some null values - labels[500] = -1 # common nan value - labels = list(labels.copy() for i in range(nlevels)) - for i in range(nlevels): - labels[i][500 + i - nlevels // 2] = -1 - - labels += [np.array([-1, 1]).repeat(500)] - else: - labels = [labels] * nlevels + [np.arange(2).repeat(500)] - - levels = [level] * nlevels + [[0, 1]] - - # no dups - index = MultiIndex(levels=levels, labels=labels) - self.assertFalse(index.has_duplicates) - - # with a dup - if with_nulls: - f = lambda a: np.insert(a, 1000, a[0]) - labels = list(map(f, labels)) - index = MultiIndex(levels=levels, labels=labels) - else: - values = index.values.tolist() - index = MultiIndex.from_tuples(values + [values[0]]) - - self.assertTrue(index.has_duplicates) - - # no overflow - check(4, False) - check(4, True) - - # overflow possible - check(8, False) - check(8, True) - - # GH 9125 - n, k = 200, 5000 - levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] - labels = [np.random.choice(n, k * n) for lev in levels] - mi = MultiIndex(levels=levels, labels=labels) - - for keep in ['first', 'last', False]: - left = mi.duplicated(keep=keep) - right = pd.lib.duplicated(mi.values, keep=keep) - tm.assert_numpy_array_equal(left, right) - - # GH5873 - for a in [101, 102]: - mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) - self.assertFalse(mi.has_duplicates) - self.assertEqual(mi.get_duplicates(), []) - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - 2, dtype='bool')) - - for n in range(1, 6): # 1st level shape - for m in range(1, 5): # 2nd level shape - # all possible unique combinations, including nan - lab = product(range(-1, n), range(-1, m)) - mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]], - labels=np.random.permutation(list(lab)).T) - self.assertEqual(len(mi), (n + 1) * (m + 1)) - self.assertFalse(mi.has_duplicates) - self.assertEqual(mi.get_duplicates(), []) - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( - len(mi), dtype='bool')) - - def test_duplicate_meta_data(self): - # GH 10115 - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ - [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) - for idx in [index, - index.set_names([None, None]), - index.set_names([None, 'Num']), - index.set_names(['Upper', 'Num']), ]: - self.assertTrue(idx.has_duplicates) - self.assertEqual(idx.drop_duplicates().names, idx.names) - - def test_tolist(self): - result = self.index.tolist() - exp = list(self.index.values) - self.assertEqual(result, exp) - - def test_repr_with_unicode_data(self): - with pd.core.config.option_context("display.encoding", 'UTF-8'): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - index = pd.DataFrame(d).set_index(["a", "b"]).index - self.assertFalse("\\u" in repr(index) - ) # we don't want unicode-escaped - - def test_repr_roundtrip(self): - - mi = MultiIndex.from_product([list('ab'), range(3)], - names=['first', 'second']) - str(mi) - - if PY3: - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - else: - result = eval(repr(mi)) - # string coerces to unicode - tm.assert_index_equal(result, mi, exact=False) - self.assertEqual( - mi.get_level_values('first').inferred_type, 'string') - self.assertEqual( - result.get_level_values('first').inferred_type, 'unicode') - - mi_u = MultiIndex.from_product( - [list(u'ab'), range(3)], names=['first', 'second']) - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - # formatting - if PY3: - str(mi) - else: - compat.text_type(mi) - - # long format - mi = MultiIndex.from_product([list('abcdefg'), range(10)], - names=['first', 'second']) - result = str(mi) - - if PY3: - tm.assert_index_equal(eval(repr(mi)), mi, exact=True) - else: - result = eval(repr(mi)) - # string coerces to unicode - tm.assert_index_equal(result, mi, exact=False) - self.assertEqual( - mi.get_level_values('first').inferred_type, 'string') - self.assertEqual( - result.get_level_values('first').inferred_type, 'unicode') - - mi = MultiIndex.from_product( - [list(u'abcdefg'), range(10)], names=['first', 'second']) - result = eval(repr(mi_u)) - tm.assert_index_equal(result, mi_u, exact=True) - - def test_str(self): - # tested elsewhere - pass - - def test_unicode_string_with_unicode(self): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - idx = pd.DataFrame(d).set_index(["a", "b"]).index - - if PY3: - str(idx) - else: - compat.text_type(idx) - - def test_bytestring_with_unicode(self): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} - idx = pd.DataFrame(d).set_index(["a", "b"]).index - - if PY3: - bytes(idx) - else: - str(idx) - - def test_slice_keep_name(self): - x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')], - names=['x', 'y']) - self.assertEqual(x[1:].names, x.names) - - def test_isnull_behavior(self): - # should not segfault GH5123 - # NOTE: if MI representation changes, may make sense to allow - # isnull(MI) - with tm.assertRaises(NotImplementedError): - pd.isnull(self.index) - - def test_level_setting_resets_attributes(self): - ind = MultiIndex.from_arrays([ - ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] - ]) - assert ind.is_monotonic - ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], - inplace=True) - # if this fails, probably didn't reset the cache correctly. - assert not ind.is_monotonic - - def test_isin(self): - values = [('foo', 2), ('bar', 3), ('quux', 4)] - - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - result = idx.isin(values) - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(result, expected) - - # empty, return dtype bool - idx = MultiIndex.from_arrays([[], []]) - result = idx.isin(values) - self.assertEqual(len(result), 0) - self.assertEqual(result.dtype, np.bool_) - - def test_isin_nan(self): - idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) - tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - [False, False]) - tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - [False, False]) - - def test_isin_level_kwarg(self): - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( - 4)]) - - vals_0 = ['foo', 'bar', 'quux'] - vals_1 = [2, 3, 10] - - expected = np.array([False, False, True, True]) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) - - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) - - self.assertRaises(IndexError, idx.isin, vals_0, level=5) - self.assertRaises(IndexError, idx.isin, vals_0, level=-5) - - self.assertRaises(KeyError, idx.isin, vals_0, level=1.0) - self.assertRaises(KeyError, idx.isin, vals_1, level=-1.0) - self.assertRaises(KeyError, idx.isin, vals_1, level='A') - - idx.names = ['A', 'B'] - tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A')) - tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B')) - - self.assertRaises(KeyError, idx.isin, vals_1, level='C') - - def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): - # GH6552 - idx = self.index.copy() - target = idx.copy() - idx.names = target.names = [None, None] - - other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) - - # list & ndarray cases - self.assertEqual(idx.reindex([])[0].names, [None, None]) - self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None]) - self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None]) - self.assertEqual(idx.reindex(target.values)[0].names, [None, None]) - self.assertEqual( - idx.reindex(other_dtype.tolist())[0].names, [None, None]) - self.assertEqual( - idx.reindex(other_dtype.values)[0].names, [None, None]) - - idx.names = ['foo', 'bar'] - self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar']) - self.assertEqual( - idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) - self.assertEqual( - idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) - - def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): - # GH7774 - idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']], - names=['foo', 'bar']) - self.assertEqual(idx.reindex([], level=0)[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex([], level=1)[0].names, ['foo', 'bar']) - - def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self): - # GH7774 - idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - self.assertEqual(idx.reindex([], level=0)[0].levels[0].dtype.type, - np.int64) - self.assertEqual(idx.reindex([], level=1)[0].levels[1].dtype.type, - np.object_) - - def test_groupby(self): - groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2])) - labels = self.index.get_values().tolist() - exp = {1: labels[:3], 2: labels[3:]} - tm.assert_dict_equal(groups, exp) - - # GH5620 - groups = self.index.groupby(self.index) - exp = dict((key, [key]) for key in self.index) - tm.assert_dict_equal(groups, exp) - - def test_index_name_retained(self): - # GH9857 - result = pd.DataFrame({'x': [1, 2, 6], - 'y': [2, 2, 8], - 'z': [-5, 0, 5]}) - result = result.set_index('z') - result.loc[10] = [9, 10] - df_expected = pd.DataFrame({'x': [1, 2, 6, 9], - 'y': [2, 2, 8, 10], - 'z': [-5, 0, 5, 10]}) - df_expected = df_expected.set_index('z') - tm.assert_frame_equal(result, df_expected) - - def test_equals_operator(self): - # GH9785 - self.assertTrue((self.index == self.index).all()) - - -def test_get_combined_index(): - from pandas.core.index import _get_combined_index - result = _get_combined_index([]) - assert (result.equals(Index([]))) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/setup.py b/setup.py index 62d9062de1155..29c3f02013712 100755 --- a/setup.py +++ b/setup.py @@ -544,6 +544,7 @@ def pxd(name): 'pandas.computation', 'pandas.computation.tests', 'pandas.core', + 'pandas.indexes', 'pandas.io', 'pandas.rpy', 'pandas.sandbox', @@ -553,6 +554,7 @@ def pxd(name): 'pandas.util', 'pandas.tests', 'pandas.tests.frame', + 'pandas.tests.indexes', 'pandas.tests.test_msgpack', 'pandas.tools', 'pandas.tools.tests', @@ -580,6 +582,7 @@ def pxd(name): 'pandas.tools': ['tests/*.csv'], 'pandas.tests': ['data/*.pickle', 'data/*.csv'], + 'pandas.tests.indexes': ['data/*.pickle'], 'pandas.tseries.tests': ['data/*.pickle', 'data/*.csv'] }, From 49da0e4b7ce5fd5acbda41173f9750a77317e2be Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 23 Jan 2016 22:56:10 -0800 Subject: [PATCH 4/8] Fix pickling --- pandas/indexes/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index 8482cf325b47d..c41e30aef1dba 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -1,4 +1,4 @@ -from .base import (Index, # noqa +from .base import (Index, _new_Index, # noqa _ensure_index, _get_na_value, InvalidIndexError) from .category import CategoricalIndex # noqa @@ -13,6 +13,7 @@ __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index', 'CategoricalIndex', 'RangeIndex', 'InvalidIndexError', + '_new_Index', '_ensure_index', '_get_na_value', '_get_combined_index', '_get_distinct_indexes', '_union_indexes', '_get_consensus_names', From a1d4d073ce3964813bad971227d77b6527bbcfd4 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 24 Jan 2016 08:13:10 -0800 Subject: [PATCH 5/8] Fix Python 2 issues --- pandas/indexes/range.py | 2 +- pandas/tests/{ => indexes}/data/multiindex_v1.pickle | 0 pandas/tests/indexes/test_base.py | 2 ++ pandas/tests/indexes/test_category.py | 2 ++ 4 files changed, 5 insertions(+), 1 deletion(-) rename pandas/tests/{ => indexes}/data/multiindex_v1.pickle (100%) diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 1b004a0034e7a..f4f5745659002 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -5,7 +5,7 @@ import pandas.index as _index from pandas import compat -from pandas.compat import lrange +from pandas.compat import lrange, range from pandas.indexes.base import Index from pandas.util.decorators import Appender, cache_readonly import pandas.core.common as com diff --git a/pandas/tests/data/multiindex_v1.pickle b/pandas/tests/indexes/data/multiindex_v1.pickle similarity index 100% rename from pandas/tests/data/multiindex_v1.pickle rename to pandas/tests/indexes/data/multiindex_v1.pickle diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9d97b98b5a13e..a0e9a4dadfda5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from datetime import datetime, timedelta # TODO(wesm): fix long line flake8 issues diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 4ec2f37ba15fb..78016c0f0b5f7 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + # TODO(wesm): fix long line flake8 issues # flake8: noqa From 9b1028381a36de940793e282136fd64934b1c788 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 24 Jan 2016 10:33:36 -0800 Subject: [PATCH 6/8] Address comments --- pandas/indexes/api.py | 15 ++++++++------- pandas/tests/indexes/common.py | 22 ---------------------- pandas/tests/indexes/test_base.py | 21 +++++++++------------ 3 files changed, 17 insertions(+), 41 deletions(-) diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index c41e30aef1dba..0e1bf8af78e96 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -1,10 +1,11 @@ -from .base import (Index, _new_Index, # noqa - _ensure_index, _get_na_value, - InvalidIndexError) -from .category import CategoricalIndex # noqa -from .multi import MultiIndex # noqa -from .numeric import NumericIndex, Float64Index, Int64Index # noqa -from .range import RangeIndex # noqa +from pandas.indexes.base import (Index, _new_Index, # noqa + _ensure_index, _get_na_value, + InvalidIndexError) +from pandas.indexes.category import CategoricalIndex # noqa +from pandas.indexes.multi import MultiIndex # noqa +from pandas.indexes.numeric import (NumericIndex, Float64Index, # noqa + Int64Index) +from pandas.indexes.range import RangeIndex # noqa import pandas.core.common as com import pandas.lib as lib diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index fbe4fb39e062e..f1824267d63d8 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -14,34 +14,12 @@ import pandas as pd -if PY3: - unicode = lambda x: x - class Base(object): """ base class for index sub-class tests """ _holder = None _compat_props = ['shape', 'ndim', 'size', 'itemsize', 'nbytes'] - # @staticmethod - # def setup_indices(cls, indices): - # # setup the test indices in the self.indices dict - # def make_accessor(x): - # @property - # def accessor(self): - # key = '__cached_{0}'.format(x) - # if hasattr(self, key): - # return getattr(self, key) - # else: - # result = self.indices[x].copy(deep=True) - # setattr(self, key, result) - # return result - - # return accessor - - # for name in indices: - # setattr(cls, name, make_accessor(name)) - def setup_indices(self): for name, idx in self.indices.items(): setattr(self, name, idx) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index a0e9a4dadfda5..88e05880722e1 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -29,9 +29,6 @@ import pandas as pd from pandas.lib import Timestamp -if PY3: - unicode = lambda x: x - class TestIndex(Base, tm.TestCase): _holder = Index @@ -1349,7 +1346,7 @@ def test_string_index_repr(self): self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # multiple lines idx = pd.Index(['a', 'bb', 'ccc'] * 10) @@ -1368,7 +1365,7 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # truncated idx = pd.Index(['a', 'bb', 'ccc'] * 100) @@ -1387,7 +1384,7 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object', length=300)""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # short idx = pd.Index([u'あ', u'いい', u'ううう']) @@ -1397,7 +1394,7 @@ def test_string_index_repr(self): else: expected = u"""\ Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1414,7 +1411,7 @@ def test_string_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1431,7 +1428,7 @@ def test_string_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object', length=300)""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # Emable Unicode option ----------------------------------------- with cf.option_context('display.unicode.east_asian_width', True): @@ -1443,7 +1440,7 @@ def test_string_index_repr(self): self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1462,7 +1459,7 @@ def test_string_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1483,7 +1480,7 @@ def test_string_index_repr(self): u'いい', u'ううう'], dtype='object', length=300)""" - self.assertEqual(unicode(idx), expected) + self.assertEqual(u(idx), expected) def test_get_combined_index(): From d87eb63085d484886f99e9f2f62795fa70f91c51 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 24 Jan 2016 10:35:17 -0800 Subject: [PATCH 7/8] Add TODO about private names --- pandas/indexes/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/indexes/api.py b/pandas/indexes/api.py index 0e1bf8af78e96..3f0ee40a6f93d 100644 --- a/pandas/indexes/api.py +++ b/pandas/indexes/api.py @@ -10,7 +10,8 @@ import pandas.core.common as com import pandas.lib as lib - +# TODO: there are many places that rely on these private methods existing in +# pandas.core.index __all__ = ['Index', 'MultiIndex', 'NumericIndex', 'Float64Index', 'Int64Index', 'CategoricalIndex', 'RangeIndex', 'InvalidIndexError', From a2e6ec669ab43a0df9961b042c6d15d957467209 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sun, 24 Jan 2016 11:42:08 -0800 Subject: [PATCH 8/8] Fix Python 2 unicode issue --- pandas/tests/indexes/test_base.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 88e05880722e1..735025cfca42e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1339,6 +1339,12 @@ def test_string_index_repr(self): # py3/py2 repr can differ because of "u" prefix # which also affects to displayed element size + # suppress flake8 warnings + if PY3: + coerce = lambda x: x + else: + coerce = unicode + # short idx = pd.Index(['a', 'bb', 'ccc']) if PY3: @@ -1346,7 +1352,7 @@ def test_string_index_repr(self): self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # multiple lines idx = pd.Index(['a', 'bb', 'ccc'] * 10) @@ -1365,7 +1371,7 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object')""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # truncated idx = pd.Index(['a', 'bb', 'ccc'] * 100) @@ -1384,7 +1390,7 @@ def test_string_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object', length=300)""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # short idx = pd.Index([u'あ', u'いい', u'ううう']) @@ -1394,7 +1400,7 @@ def test_string_index_repr(self): else: expected = u"""\ Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1411,7 +1417,7 @@ def test_string_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1428,7 +1434,7 @@ def test_string_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object', length=300)""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # Emable Unicode option ----------------------------------------- with cf.option_context('display.unicode.east_asian_width', True): @@ -1440,7 +1446,7 @@ def test_string_index_repr(self): self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # multiple lines idx = pd.Index([u'あ', u'いい', u'ううう'] * 10) @@ -1459,7 +1465,7 @@ def test_string_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object')""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) # truncated idx = pd.Index([u'あ', u'いい', u'ううう'] * 100) @@ -1480,7 +1486,7 @@ def test_string_index_repr(self): u'いい', u'ううう'], dtype='object', length=300)""" - self.assertEqual(u(idx), expected) + self.assertEqual(coerce(idx), expected) def test_get_combined_index():