diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5ff1ea9d194f6..d3914ac451c6e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -454,6 +454,7 @@ Other Deprecations - Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`) - Deprecated behavior of :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupBy.groups`, in a future version ``groups`` by one element list will return tuple instead of scalar. (:issue:`58858`) - Deprecated behavior of :meth:`Series.dt.to_pytimedelta`, in a future version this will return a :class:`Series` containing python ``datetime.timedelta`` objects instead of an ``ndarray`` of timedelta; this matches the behavior of other :meth:`Series.dt` properties. (:issue:`57463`) +- Deprecated converting object-dtype columns of ``datetime.datetime`` objects to datetime64 when writing to stata (:issue:`56536`) - Deprecated lowercase strings ``d``, ``b`` and ``c`` denoting frequencies in :class:`Day`, :class:`BusinessDay` and :class:`CustomBusinessDay` in favour of ``D``, ``B`` and ``C`` (:issue:`58998`) - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) - Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 092c24f0d31c3..08177e76ee237 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -393,14 +393,22 @@ def parse_dates_safe( d["days"] = np.asarray(diff).astype("m8[D]").view("int64") elif infer_dtype(dates, skipna=False) == "datetime": + warnings.warn( + # GH#56536 + "Converting object-dtype columns of datetimes to datetime64 when " + "writing to stata is deprecated. Call " + "`df=df.infer_objects(copy=False)` before writing to stata instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) if delta: delta = dates._values - stata_epoch def f(x: timedelta) -> float: - return US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds + return US_PER_DAY * x.days + 1_000_000 * x.seconds + x.microseconds v = np.vectorize(f) - d["delta"] = v(delta) + d["delta"] = v(delta) // 1_000 # convert back to ms if year: year_month = dates.apply(lambda x: 100 * x.year + x.month) d["year"] = year_month._values // 100 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b155c0cca4aa6..90fda2c10962b 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -1030,7 +1030,13 @@ def test_big_dates(self, datapath, temp_file): # {c : c[-2:] for c in columns} path = temp_file expected.index.name = "index" - expected.to_stata(path, convert_dates=date_conversion) + msg = ( + "Converting object-dtype columns of datetimes to datetime64 " + "when writing to stata is deprecated" + ) + exp_object = expected.astype(object) + with tm.assert_produces_warning(FutureWarning, match=msg): + exp_object.to_stata(path, convert_dates=date_conversion) written_and_read_again = self.read_dta(path) tm.assert_frame_equal(