diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 5811a8c4c45ff..dde098be2e5ae 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -688,6 +688,7 @@ Other API Changes - :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`) - :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`) - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) +- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`) .. _whatsnew_0240.deprecations: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 4d611f89bca9c..393c2cdba8568 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -282,11 +282,6 @@ class ApplyTypeError(TypeError): pass -# TODO: unused. remove? -class CacheableOffset(object): - _cacheable = True - - # --------------------------------------------------------------------- # Base Classes @@ -296,8 +291,6 @@ class _BaseOffset(object): and will (after pickle errors are resolved) go into a cdef class. """ _typ = "dateoffset" - _normalize_cache = True - _cacheable = False _day_opt = None _attributes = frozenset(['n', 'normalize']) @@ -386,10 +379,6 @@ class _BaseOffset(object): # that allows us to use methods that can go in a `cdef class` return self * 1 - # TODO: this is never true. fix it or get rid of it - def _should_cache(self): - return self.isAnchored() and self._cacheable - def __repr__(self): className = getattr(self, '_outputName', type(self).__name__) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4c75927135b22..6cc4922788cf3 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -13,7 +13,7 @@ resolution as libresolution) from pandas.util._decorators import cache_readonly -from pandas.errors import PerformanceWarning, AbstractMethodError +from pandas.errors import PerformanceWarning from pandas import compat from pandas.core.dtypes.common import ( @@ -268,27 +268,22 @@ def _generate_range(cls, start, end, periods, freq, tz=None, end, end.tz, start.tz, freq, tz ) if freq is not None: - if cls._use_cached_range(freq, _normalized, start, end): - # Currently always False; never hit - # Should be reimplemented as a part of GH#17914 - index = cls._cached_range(start, end, periods=periods, - freq=freq) - else: - index = _generate_regular_range(cls, start, end, periods, freq) - - if tz is not None and getattr(index, 'tz', None) is None: - arr = conversion.tz_localize_to_utc( - ensure_int64(index.values), - tz, ambiguous=ambiguous) - - index = cls(arr) - - # index is localized datetime64 array -> have to convert - # start/end as well to compare - if start is not None: - start = start.tz_localize(tz).asm8 - if end is not None: - end = end.tz_localize(tz).asm8 + # TODO: consider re-implementing _cached_range; GH#17914 + index = _generate_regular_range(cls, start, end, periods, freq) + + if tz is not None and getattr(index, 'tz', None) is None: + arr = conversion.tz_localize_to_utc( + ensure_int64(index.values), + tz, ambiguous=ambiguous) + + index = cls(arr) + + # index is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz).asm8 + if end is not None: + end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time arr = np.linspace(start.value, end.value, periods) @@ -303,16 +298,6 @@ def _generate_range(cls, start, end, periods, freq, tz=None, return cls._simple_new(index.values, freq=freq, tz=tz) - @classmethod - def _use_cached_range(cls, freq, _normalized, start, end): - # DatetimeArray is mutable, so is not cached - return False - - @classmethod - def _cached_range(cls, start=None, end=None, - periods=None, freq=None, **kwargs): - raise AbstractMethodError(cls) - # ----------------------------------------------------------------- # Descriptive Properties diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 70140d2d9a432..e0219acc115b5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -40,7 +40,7 @@ DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, wrap_field_accessor, wrap_array_method) from pandas.tseries.offsets import ( - generate_range, CDay, prefix_mapping) + CDay, prefix_mapping) from pandas.core.tools.timedeltas import to_timedelta from pandas.util._decorators import Appender, cache_readonly, Substitution @@ -326,13 +326,6 @@ def _generate_range(cls, start, end, periods, name=None, freq=None, out.name = name return out - @classmethod - def _use_cached_range(cls, freq, _normalized, start, end): - # Note: This always returns False - return (freq._should_cache() and - not (freq._normalize_cache and not _normalized) and - _naive_in_cache_range(start, end)) - def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ if self._has_same_tz(value): @@ -410,71 +403,6 @@ def nbytes(self): # for TZ-aware return self._ndarray_values.nbytes - @classmethod - def _cached_range(cls, start=None, end=None, periods=None, freq=None, - name=None): - if start is None and end is None: - # I somewhat believe this should never be raised externally - raise TypeError('Must specify either start or end.') - if start is not None: - start = Timestamp(start) - if end is not None: - end = Timestamp(end) - if (start is None or end is None) and periods is None: - raise TypeError( - 'Must either specify period or provide both start and end.') - - if freq is None: - # This can't happen with external-facing code - raise TypeError('Must provide freq.') - - drc = _daterange_cache - if freq not in _daterange_cache: - xdr = generate_range(offset=freq, start=_CACHE_START, - end=_CACHE_END) - - arr = tools.to_datetime(list(xdr), box=False) - - cachedRange = DatetimeIndex._simple_new(arr) - cachedRange.freq = freq - cachedRange = cachedRange.tz_localize(None) - cachedRange.name = None - drc[freq] = cachedRange - else: - cachedRange = drc[freq] - - if start is None: - if not isinstance(end, Timestamp): - raise AssertionError('end must be an instance of Timestamp') - - end = freq.rollback(end) - - endLoc = cachedRange.get_loc(end) + 1 - startLoc = endLoc - periods - elif end is None: - if not isinstance(start, Timestamp): - raise AssertionError('start must be an instance of Timestamp') - - start = freq.rollforward(start) - - startLoc = cachedRange.get_loc(start) - endLoc = startLoc + periods - else: - if not freq.onOffset(start): - start = freq.rollforward(start) - - if not freq.onOffset(end): - end = freq.rollback(end) - - startLoc = cachedRange.get_loc(start) - endLoc = cachedRange.get_loc(end) + 1 - - indexSlice = cachedRange[startLoc:endLoc] - indexSlice.name = name - indexSlice.freq = freq - - return indexSlice - def _mpl_repr(self): # how to represent ourselves to matplotlib return libts.ints_to_pydatetime(self.asi8, self.tz) @@ -832,22 +760,19 @@ def _fast_union(self, other): else: left, right = other, self - left_start, left_end = left[0], left[-1] + left_end = left[-1] right_end = right[-1] - if not self.freq._should_cache(): - # concatenate dates - if left_end < right_end: - loc = right.searchsorted(left_end, side='right') - right_chunk = right.values[loc:] - dates = _concat._concat_compat((left.values, right_chunk)) - return self._shallow_copy(dates) - else: - return left + # TODO: consider re-implementing freq._should_cache for fastpath + + # concatenate dates + if left_end < right_end: + loc = right.searchsorted(left_end, side='right') + right_chunk = right.values[loc:] + dates = _concat._concat_compat((left.values, right_chunk)) + return self._shallow_copy(dates) else: - return type(self)(start=left_start, - end=max(left_end, right_end), - freq=left.freq) + return left def _wrap_union_result(self, other, result): name = self.name if self.name == other.name else None @@ -1724,21 +1649,6 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, closed=closed, **kwargs) -_CACHE_START = Timestamp(datetime(1950, 1, 1)) -_CACHE_END = Timestamp(datetime(2030, 1, 1)) - -_daterange_cache = {} - - -def _naive_in_cache_range(start, end): - if start is None or end is None: - return False - else: - if start.tzinfo is not None or end.tzinfo is not None: - return False - return start > _CACHE_START and end < _CACHE_END - - def _time_to_micros(time): seconds = time.hour * 60 * 60 + 60 * time.minute + time.second return 1000000 * seconds + time.microsecond diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index e0caf671fc390..7481c4a710083 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -616,23 +616,6 @@ def test_naive_aware_conflicts(self): with tm.assert_raises_regex(TypeError, msg): aware.join(naive) - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, freq=BDay()) - DatetimeIndex._cached_range(START, periods=20, freq=BDay()) - DatetimeIndex._cached_range(end=START, periods=20, freq=BDay()) - - with tm.assert_raises_regex(TypeError, "freq"): - DatetimeIndex._cached_range(START, END) - - with tm.assert_raises_regex(TypeError, "specify period"): - DatetimeIndex._cached_range(START, freq=BDay()) - - with tm.assert_raises_regex(TypeError, "specify period"): - DatetimeIndex._cached_range(end=END, freq=BDay()) - - with tm.assert_raises_regex(TypeError, "start or end"): - DatetimeIndex._cached_range(periods=20, freq=BDay()) - def test_misc(self): end = datetime(2009, 5, 13) dr = bdate_range(end=end, periods=20) @@ -693,29 +676,6 @@ def test_constructor(self): with tm.assert_raises_regex(TypeError, msg): bdate_range('2011-1-1', '2012-1-1', 'C') - def test_cached_range(self): - DatetimeIndex._cached_range(START, END, freq=CDay()) - DatetimeIndex._cached_range(START, periods=20, - freq=CDay()) - DatetimeIndex._cached_range(end=START, periods=20, - freq=CDay()) - - # with pytest.raises(TypeError): - with tm.assert_raises_regex(TypeError, "freq"): - DatetimeIndex._cached_range(START, END) - - # with pytest.raises(TypeError): - with tm.assert_raises_regex(TypeError, "specify period"): - DatetimeIndex._cached_range(START, freq=CDay()) - - # with pytest.raises(TypeError): - with tm.assert_raises_regex(TypeError, "specify period"): - DatetimeIndex._cached_range(end=END, freq=CDay()) - - # with pytest.raises(TypeError): - with tm.assert_raises_regex(TypeError, "start or end"): - DatetimeIndex._cached_range(periods=20, freq=CDay()) - def test_misc(self): end = datetime(2009, 5, 13) dr = bdate_range(end=end, periods=20, freq='C') diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index bda4d71d58e82..a0cff6f74b979 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -15,11 +15,9 @@ from pandas._libs.tslibs.frequencies import (get_freq_code, get_freq_str, INVALID_FREQ_ERR_MSG) from pandas.tseries.frequencies import _offset_map, get_offset -from pandas.core.indexes.datetimes import ( - _to_m8, DatetimeIndex, _daterange_cache) +from pandas.core.indexes.datetimes import _to_m8, DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex import pandas._libs.tslibs.offsets as liboffsets -from pandas._libs.tslibs.offsets import CacheableOffset from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, BusinessHour, WeekOfMonth, CBMonthEnd, CustomBusinessHour, @@ -28,7 +26,7 @@ BYearBegin, QuarterBegin, BQuarterBegin, BMonthBegin, DateOffset, Week, YearBegin, YearEnd, Day, - QuarterEnd, BusinessMonthEnd, FY5253, + QuarterEnd, FY5253, Nano, Easter, FY5253Quarter, LastWeekOfMonth, Tick, CalendarDay) import pandas.tseries.offsets as offsets @@ -2830,70 +2828,6 @@ def test_freq_offsets(): assert (off.freqstr == 'B-30Min') -def get_all_subclasses(cls): - ret = set() - this_subclasses = cls.__subclasses__() - ret = ret | set(this_subclasses) - for this_subclass in this_subclasses: - ret | get_all_subclasses(this_subclass) - return ret - - -class TestCaching(object): - - # as of GH 6479 (in 0.14.0), offset caching is turned off - # as of v0.12.0 only BusinessMonth/Quarter were actually caching - - def setup_method(self, method): - _daterange_cache.clear() - _offset_map.clear() - - def run_X_index_creation(self, cls): - inst1 = cls() - if not inst1.isAnchored(): - assert not inst1._should_cache(), cls - return - - assert inst1._should_cache(), cls - - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), - freq=inst1, normalize=True) - assert cls() in _daterange_cache, cls - - def test_should_cache_month_end(self): - assert not MonthEnd()._should_cache() - - def test_should_cache_bmonth_end(self): - assert not BusinessMonthEnd()._should_cache() - - def test_should_cache_week_month(self): - assert not WeekOfMonth(weekday=1, week=2)._should_cache() - - def test_all_cacheableoffsets(self): - for subclass in get_all_subclasses(CacheableOffset): - if subclass.__name__[0] == "_" \ - or subclass in TestCaching.no_simple_ctr: - continue - self.run_X_index_creation(subclass) - - def test_month_end_index_creation(self): - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 31), - freq=MonthEnd(), normalize=True) - assert not MonthEnd() in _daterange_cache - - def test_bmonth_end_index_creation(self): - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), - freq=BusinessMonthEnd(), normalize=True) - assert not BusinessMonthEnd() in _daterange_cache - - def test_week_of_month_index_creation(self): - inst1 = WeekOfMonth(weekday=1, week=2) - DatetimeIndex(start=datetime(2013, 1, 31), end=datetime(2013, 3, 29), - freq=inst1, normalize=True) - inst2 = WeekOfMonth(weekday=1, week=2) - assert inst2 not in _daterange_cache - - class TestReprNames(object): def test_str_for_named_is_name(self): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 0a9931c46bbd5..e6d73fc45c502 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -806,7 +806,6 @@ class CustomBusinessDay(_CustomMixin, BusinessDay): passed to ``numpy.busdaycalendar`` calendar : pd.HolidayCalendar or np.busdaycalendar """ - _cacheable = False _prefix = 'C' _attributes = frozenset(['n', 'normalize', 'weekmask', 'holidays', 'calendar', 'offset']) @@ -958,7 +957,6 @@ class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): passed to ``numpy.busdaycalendar`` calendar : pd.HolidayCalendar or np.busdaycalendar """ - _cacheable = False _attributes = frozenset(['n', 'normalize', 'weekmask', 'holidays', 'calendar', 'offset'])