From 9ae8c966c27c37e4a31ad29506dbe83f0a3bc530 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 30 Jan 2020 16:18:56 -0800 Subject: [PATCH 1/4] BUG: Period[us] start_time off by 1 nanosecond --- pandas/_libs/tslibs/period.pyx | 8 ++++++++ pandas/tests/scalar/period/test_asfreq.py | 13 ++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 3dd560ece188d..c7f6bc40f55c1 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1182,10 +1182,18 @@ cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: cdef: npy_datetimestruct dts + int64_t value if ordinal == NPY_NAT: return NPY_NAT + if freq == 11000: + # Microsecond, avoid get_date_info to prevent floating point errors + value = ordinal * 1000 + dt64_to_dtstruct(value, &dts) + check_dts_bounds(&dts) + return value + get_date_info(ordinal, freq, &dts) check_dts_bounds(&dts) return dtstruct_to_dt64(&dts) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 357274e724c68..0606b93db7eeb 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -3,7 +3,7 @@ from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG, _period_code_map from pandas.errors import OutOfBoundsDatetime -from pandas import Period, offsets +from pandas import Period, Timestamp, offsets class TestFreqConversion: @@ -656,6 +656,17 @@ def test_conv_secondly(self): assert ival_S.asfreq("S") == ival_S + def test_conv_microsecond(self): + # Avoid floating point errors dropping the start_time to before + # the beginning of the Period + per = Period("2020-01-30 15:57:27.576166", freq="U") + assert per.ordinal == 1580399847576166 + + start = per.start_time + expected = Timestamp("2020-01-30 15:57:27.576166") + assert start == expected + assert start.value == per.ordinal * 1000 + def test_asfreq_mult(self): # normal freq to mult freq p = Period(freq="A", year=2007) From 430fdcf8c81478093611ce8f71b88d864789109a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 30 Jan 2020 17:01:06 -0800 Subject: [PATCH 2/4] catch overflows --- pandas/_libs/tslibs/period.pyx | 9 ++++----- pandas/tests/scalar/period/test_asfreq.py | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c7f6bc40f55c1..7e2391a0a9e8c 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -22,7 +22,7 @@ PyDateTime_IMPORT from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct, pandas_datetime_to_datetimestruct, check_dts_bounds, - NPY_DATETIMEUNIT, NPY_FR_D) + NPY_DATETIMEUNIT, NPY_FR_D, NPY_FR_us) cdef extern from "src/datetime/np_datetime.h": int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, @@ -1182,17 +1182,16 @@ cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: cdef: npy_datetimestruct dts - int64_t value if ordinal == NPY_NAT: return NPY_NAT if freq == 11000: # Microsecond, avoid get_date_info to prevent floating point errors - value = ordinal * 1000 - dt64_to_dtstruct(value, &dts) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts) check_dts_bounds(&dts) - return value + # Equivalent: return ordinal * 1000 + return dtstruct_to_dt64(&dts) get_date_info(ordinal, freq, &dts) check_dts_bounds(&dts) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 0606b93db7eeb..0da2853ee7d2c 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -667,6 +667,12 @@ def test_conv_microsecond(self): assert start == expected assert start.value == per.ordinal * 1000 + per2 = Period("2300-01-01", "us") + with pytest.raises(OutOfBoundsDatetime, match="2300-01-01"): + per2.start_time + with pytest.raises(OutOfBoundsDatetime, match="2300-01-01"): + per2.end_time + def test_asfreq_mult(self): # normal freq to mult freq p = Period(freq="A", year=2007) From 949054545c595b0f2ffb9782ad2fec362db8d6c3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 08:13:28 -0800 Subject: [PATCH 3/4] GH ref, whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/scalar/period/test_asfreq.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e07a8fa0469f4..7db306d7ff82b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -105,6 +105,7 @@ Datetimelike - Bug in :class:`Timestamp` where constructing :class:`Timestamp` from ambiguous epoch time and calling constructor again changed :meth:`Timestamp.value` property (:issue:`24329`) - :meth:`DatetimeArray.searchsorted`, :meth:`TimedeltaArray.searchsorted`, :meth:`PeriodArray.searchsorted` not recognizing non-pandas scalars and incorrectly raising ``ValueError`` instead of ``TypeError`` (:issue:`30950`) - Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`) +- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`) Timedelta ^^^^^^^^^ diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 0da2853ee7d2c..436810042186a 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -657,8 +657,8 @@ def test_conv_secondly(self): assert ival_S.asfreq("S") == ival_S def test_conv_microsecond(self): - # Avoid floating point errors dropping the start_time to before - # the beginning of the Period + # GH#31475 Avoid floating point errors dropping the start_time to + # before the beginning of the Period per = Period("2020-01-30 15:57:27.576166", freq="U") assert per.ordinal == 1580399847576166 From 85e47a0fdf777467a6889b202bf7007868cf7bd0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 31 Jan 2020 13:08:06 -0800 Subject: [PATCH 4/4] REF: make DTI._parsed_string_to_bounds work like PI implementation --- pandas/core/indexes/datetimes.py | 63 ++++---------------------------- 1 file changed, 8 insertions(+), 55 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2b4636155111f..6f5f9e0d5783a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -5,15 +5,8 @@ import numpy as np -from pandas._libs import ( - NaT, - Timedelta, - Timestamp, - index as libindex, - lib, - tslib as libts, -) -from pandas._libs.tslibs import ccalendar, fields, parsing, timezones +from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts +from pandas._libs.tslibs import fields, parsing, timezones from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar @@ -476,7 +469,7 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime): Parameters ---------- - reso : Resolution + reso : str Resolution provided by parsed string. parsed : datetime Datetime from parsed string. @@ -484,7 +477,6 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime): Returns ------- lower, upper: pd.Timestamp - """ valid_resos = { "year", @@ -500,50 +492,11 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime): } if reso not in valid_resos: raise KeyError - if reso == "year": - start = Timestamp(parsed.year, 1, 1) - end = Timestamp(parsed.year + 1, 1, 1) - Timedelta(nanoseconds=1) - elif reso == "month": - d = ccalendar.get_days_in_month(parsed.year, parsed.month) - start = Timestamp(parsed.year, parsed.month, 1) - end = start + Timedelta(days=d, nanoseconds=-1) - elif reso == "quarter": - qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead - d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month - start = Timestamp(parsed.year, parsed.month, 1) - end = Timestamp(parsed.year, qe, 1) + Timedelta(days=d, nanoseconds=-1) - elif reso == "day": - start = Timestamp(parsed.year, parsed.month, parsed.day) - end = start + Timedelta(days=1, nanoseconds=-1) - elif reso == "hour": - start = Timestamp(parsed.year, parsed.month, parsed.day, parsed.hour) - end = start + Timedelta(hours=1, nanoseconds=-1) - elif reso == "minute": - start = Timestamp( - parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute - ) - end = start + Timedelta(minutes=1, nanoseconds=-1) - elif reso == "second": - start = Timestamp( - parsed.year, - parsed.month, - parsed.day, - parsed.hour, - parsed.minute, - parsed.second, - ) - end = start + Timedelta(seconds=1, nanoseconds=-1) - elif reso == "microsecond": - start = Timestamp( - parsed.year, - parsed.month, - parsed.day, - parsed.hour, - parsed.minute, - parsed.second, - parsed.microsecond, - ) - end = start + Timedelta(microseconds=1, nanoseconds=-1) + + grp = Resolution.get_freq_group(reso) + per = Period(parsed, freq=(grp, 1)) + start, end = per.start_time, per.end_time + # GH 24076 # If an incoming date string contained a UTC offset, need to localize # the parsed date to this offset first before aligning with the index's