diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7823f74b7a153..4213cc8e6cfcf 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -360,6 +360,7 @@ Datetimelike - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`) - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`) - Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`) +- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 203308b4f0dee..be087e19ce7b6 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -539,7 +539,7 @@ def _unbox_scalar(self, value) -> np.datetime64: if value is NaT: return np.datetime64(value._value, self.unit) else: - return value.as_unit(self.unit).asm8 + return value.as_unit(self.unit, round_ok=False).asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: return Timestamp(value, tz=self.tz) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 6eb4d234b349d..ff43f97161136 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -322,7 +322,7 @@ def _unbox_scalar(self, value) -> np.timedelta64: if value is NaT: return np.timedelta64(value._value, self.unit) else: - return value.as_unit(self.unit).asm8 + return value.as_unit(self.unit, round_ok=False).asm8 def _scalar_from_string(self, value) -> Timedelta | NaTType: return Timedelta(value) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7122de745e13b..6d5f32774f485 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -515,6 +515,8 @@ def _parsed_string_to_bounds( freq = OFFSET_TO_PERIOD_FREQSTR.get(reso.attr_abbrev, reso.attr_abbrev) per = Period(parsed, freq=freq) start, end = per.start_time, per.end_time + start = start.as_unit(self.unit) + end = end.as_unit(self.unit) # GH 24076 # If an incoming date string contained a UTC offset, need to localize diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1b72c164f7945..28d3292a1c65b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -38,7 +38,10 @@ Shape, npt, ) -from pandas.errors import AbstractMethodError +from pandas.errors import ( + AbstractMethodError, + OutOfBoundsDatetime, +) from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -478,7 +481,17 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: f"{self.values.dtype}. Please report a bug at " "https://github.com/pandas-dev/pandas/issues." ) - return self.astype(new_dtype) + try: + return self.astype(new_dtype) + except OutOfBoundsDatetime as err: + # e.g. GH#56419 if self.dtype is a low-resolution dt64 and we try to + # upcast to a higher-resolution dt64, we may have entries that are + # out of bounds for the higher resolution. + # Re-raise with a more informative message. + raise OutOfBoundsDatetime( + f"Incompatible (high-resolution) value for dtype='{self.dtype}'. " + "Explicitly cast before operating." + ) from err @final def convert(self) -> list[Block]: diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 99535f273075c..7a2a4892f61fb 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1467,6 +1467,39 @@ def test_slice_key(self, obj, key, expected, warn, val, indexer_sli, is_inplace) raise AssertionError("xfail not relevant for this test.") +@pytest.mark.parametrize( + "exp_dtype", + [ + "M8[ms]", + "M8[ms, UTC]", + "m8[ms]", + ], +) +class TestCoercionDatetime64HigherReso(CoercionTest): + @pytest.fixture + def obj(self, exp_dtype): + idx = date_range("2011-01-01", freq="D", periods=4, unit="s") + if exp_dtype == "m8[ms]": + idx = idx - Timestamp("1970-01-01") + assert idx.dtype == "m8[s]" + elif exp_dtype == "M8[ms, UTC]": + idx = idx.tz_localize("UTC") + return Series(idx) + + @pytest.fixture + def val(self, exp_dtype): + ts = Timestamp("2011-01-02 03:04:05.678").as_unit("ms") + if exp_dtype == "m8[ms]": + return ts - Timestamp("1970-01-01") + elif exp_dtype == "M8[ms, UTC]": + return ts.tz_localize("UTC") + return ts + + @pytest.fixture + def warn(self): + return FutureWarning + + @pytest.mark.parametrize( "val,exp_dtype,warn", [ diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 75b4050c18afe..8ed422fc118dc 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.errors import OutOfBoundsDatetime + import pandas as pd from pandas import ( Series, @@ -131,12 +133,30 @@ def test_clip_with_datetimes(self): ) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [object, "M8[us]"]) - def test_clip_with_timestamps_and_oob_datetimes(self, dtype): + def test_clip_with_timestamps_and_oob_datetimes_object(self): # GH-42794 - ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype) + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=object) result = ser.clip(lower=Timestamp.min, upper=Timestamp.max) - expected = Series([Timestamp.min, Timestamp.max], dtype=dtype) + expected = Series([Timestamp.min, Timestamp.max], dtype=object) + + tm.assert_series_equal(result, expected) + + def test_clip_with_timestamps_and_oob_datetimes_non_nano(self): + # GH#56410 + dtype = "M8[us]" + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype) + + msg = ( + r"Incompatible \(high-resolution\) value for dtype='datetime64\[us\]'. " + "Explicitly cast before operating" + ) + with pytest.raises(OutOfBoundsDatetime, match=msg): + ser.clip(lower=Timestamp.min, upper=Timestamp.max) + + lower = Timestamp.min.as_unit("us") + upper = Timestamp.max.as_unit("us") + result = ser.clip(lower=lower, upper=upper) + expected = Series([lower, upper], dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 0965d36e4827d..592dba253532d 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -308,12 +308,7 @@ def test_datetime64_fillna(self): "scalar", [ False, - pytest.param( - True, - marks=pytest.mark.xfail( - reason="GH#56410 scalar case not yet addressed" - ), - ), + True, ], ) @pytest.mark.parametrize("tz", [None, "UTC"]) @@ -342,12 +337,7 @@ def test_datetime64_fillna_mismatched_reso_no_rounding(self, tz, scalar): "scalar", [ False, - pytest.param( - True, - marks=pytest.mark.xfail( - reason="GH#56410 scalar case not yet addressed" - ), - ), + True, ], ) def test_timedelta64_fillna_mismatched_reso_no_rounding(self, scalar):