diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1c99ba0b8e412..86fa9083a33cf 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -909,6 +909,8 @@ Timezones - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` with object-dtype containing multiple timezone-aware ``datetime`` objects with heterogeneous timezones to a :class:`DatetimeTZDtype` incorrectly raising (:issue:`32581`) - Bug in :func:`to_datetime` was failing to parse date strings with timezone name when ``format`` was specified with ``%Z`` (:issue:`49748`) - Better error message when passing invalid values to ``ambiguous`` parameter in :meth:`Timestamp.tz_localize` (:issue:`49565`) +- Bug in string parsing incorrectly allowing a :class:`Timestamp` to be constructed with an invalid timezone, which would raise when trying to print (:issue:`50668`) +- Numeric ^^^^^^^ diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index dabeab3e30f4d..3bc422942276f 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -304,6 +304,19 @@ def parse_datetime_string( raise OutOfBoundsDatetime( f'Parsing "{date_string}" to datetime overflows' ) from err + if dt.tzinfo is not None: + # dateutil can return a datetime with a tzoffset outside of (-24H, 24H) + # bounds, which is invalid (can be constructed, but raises if we call + # str(dt)). Check that and raise here if necessary. + try: + dt.utcoffset() + except ValueError as err: + # offset must be a timedelta strictly between -timedelta(hours=24) + # and timedelta(hours=24) + raise ValueError( + f'Parsed string "{date_string}" gives an invalid tzoffset, ' + "which must be between -timedelta(hours=24) and timedelta(hours=24)" + ) return dt diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index aa5a55e6ca39d..8ee92e28b78bf 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -24,6 +24,16 @@ class TestTimestampConstructors: + def test_construct_from_string_invalid_raises(self): + # dateutil (weirdly) parses "200622-12-31" as + # datetime(2022, 6, 20, 12, 0, tzinfo=tzoffset(None, -111600) + # which besides being mis-parsed, is a tzoffset that will cause + # str(ts) to raise ValueError. Ensure we raise in the constructor + # instead. + # see test_to_datetime_malformed_raise for analogous to_datetime test + with pytest.raises(ValueError, match="gives an invalid tzoffset"): + Timestamp("200622-12-31") + def test_constructor_from_iso8601_str_with_offset_reso(self): # GH#49737 ts = Timestamp("2016-01-01 04:05:06-01:00") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index dd1012d57d6bc..a6e40c30d5b82 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1561,12 +1561,14 @@ def test_to_datetime_malformed_no_raise(self, errors, expected): def test_to_datetime_malformed_raise(self): # GH 48633 ts_strings = ["200622-12-31", "111111-24-11"] + msg = ( + 'Parsed string "200622-12-31" gives an invalid tzoffset, which must ' + r"be between -timedelta\(hours=24\) and timedelta\(hours=24\), " + "at position 0" + ) with pytest.raises( ValueError, - match=( - r"^offset must be a timedelta strictly between " - r"-timedelta\(hours=24\) and timedelta\(hours=24\)., at position 0$" - ), + match=msg, ): with tm.assert_produces_warning( UserWarning, match="Could not infer format"