From 767cc432968fef8dc10cdb6db3aa13c3ce039df9 Mon Sep 17 00:00:00 2001 From: "snitish.iitk@gmail.com" Date: Fri, 28 Feb 2025 21:19:05 -0600 Subject: [PATCH 1/2] BUG: Fix bug in to_datetime that occasionally throws FloatingPointError when called on a float array with missing values --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/_libs/tslibs/conversion.pyx | 1 + pandas/tests/tools/test_to_datetime.py | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b5bd3216dd990..78dfea820481a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -649,6 +649,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`) - Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`) - Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`) +- Bug in :meth:`to_datetime` on float array with missing values throwing ``FloatingPointError`` (:issue:`58419`) - Bug in :meth:`to_datetime` on float32 df with year, month, day etc. columns leads to precision issues and incorrect result. (:issue:`60506`) - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) - Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7a8b4df447aee..0e065691c8a13 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -142,6 +142,7 @@ def cast_from_unit_vectorized( for i in range(len(values)): if is_nan(values[i]): base[i] = NPY_NAT + frac[i] = 0 else: base[i] = values[i] frac[i] = values[i] - base[i] diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index e039f54960389..616ae36c989be 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2520,6 +2520,15 @@ def test_to_datetime_overflow(self): with pytest.raises(OutOfBoundsTimedelta, match=msg): date_range(start="1/1/1700", freq="B", periods=100000) + def test_to_datetime_float_with_nans_floating_point_error(self): + # GH#58419 + ser = Series([np.nan] * 1000 + [1712219033.0], dtype=np.float64) + result = to_datetime(ser, unit="s", errors="coerce") + expected = Series( + [NaT] * 1000 + [Timestamp("2024-04-04 08:23:53")], dtype="datetime64[ns]" + ) + tm.assert_series_equal(result, expected) + def test_string_invalid_operation(self, cache): invalid = np.array(["87156549591102612381000001219H5"], dtype=object) # GH #51084 From d2dc2048cb4178f2881551b888f01efacb67862b Mon Sep 17 00:00:00 2001 From: "snitish.iitk@gmail.com" Date: Sat, 1 Mar 2025 15:43:31 -0600 Subject: [PATCH 2/2] Address review comment --- pandas/_libs/tslibs/conversion.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 0e065691c8a13..c4acf72ab87d8 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -137,12 +137,11 @@ def cast_from_unit_vectorized( out = np.empty(shape, dtype="i8") base = np.empty(shape, dtype="i8") - frac = np.empty(shape, dtype="f8") + frac = np.zeros(shape, dtype="f8") for i in range(len(values)): if is_nan(values[i]): base[i] = NPY_NAT - frac[i] = 0 else: base[i] = values[i] frac[i] = values[i] - base[i]