Skip to content

TYP: maybe_cast_to_datetime #39959

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,9 @@ def sanitize_array(
subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)

else:
# realize e.g. generators
# TODO: non-standard array-likes we can convert to ndarray more efficiently?
data = list(data)
subarr = _try_cast(data, dtype, copy, raise_cast_failure)

subarr = _sanitize_ndim(subarr, data, dtype, index)
Expand Down Expand Up @@ -594,20 +597,29 @@ def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike:
return arr


def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool):
def _try_cast(
arr: Union[list, np.ndarray],
dtype: Optional[DtypeObj],
copy: bool,
raise_cast_failure: bool,
) -> ArrayLike:
"""
Convert input to numpy ndarray and optionally cast to a given dtype.

Parameters
----------
arr : ndarray, list, tuple, iterator (catchall)
arr : ndarray or list
Excludes: ExtensionArray, Series, Index.
dtype : np.dtype, ExtensionDtype or None
copy : bool
If False, don't copy the data if not needed.
raise_cast_failure : bool
If True, and if a dtype is specified, raise errors during casting.
Otherwise an object array is returned.

Returns
-------
np.ndarray or ExtensionArray
"""
# perf shortcut as this is the most common case
if (
Expand Down
59 changes: 31 additions & 28 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,7 +1422,7 @@ def maybe_infer_to_datetimelike(
v = np.array(v, copy=False)

# we only care about object dtypes
if not is_object_dtype(v):
if not is_object_dtype(v.dtype):
return value

shape = v.shape
Expand Down Expand Up @@ -1499,7 +1499,9 @@ def try_timedelta(v: np.ndarray) -> np.ndarray:
return value


def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
def maybe_cast_to_datetime(
value: Union[ExtensionArray, np.ndarray, list], dtype: Optional[DtypeObj]
) -> Union[ExtensionArray, np.ndarray, list]:
"""
try to cast the array/value to a datetimelike dtype, converting float
nan to iNaT
Expand Down Expand Up @@ -1563,26 +1565,28 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):

try:
if is_datetime64:
value = to_datetime(value, errors="raise")
dti = to_datetime(value, errors="raise")
# GH 25843: Remove tz information since the dtype
# didn't specify one
if value.tz is not None:
value = value.tz_localize(None)
value = value._values
if dti.tz is not None:
dti = dti.tz_localize(None)
value = dti._values
elif is_datetime64tz:
# The string check can be removed once issue #13712
# is solved. String data that is passed with a
# datetime64tz is assumed to be naive which should
# be localized to the timezone.
is_dt_string = is_string_dtype(value.dtype)
value = to_datetime(value, errors="raise").array
if is_dt_string:
dta = to_datetime(value, errors="raise").array
if dta.tz is not None:
value = dta.astype(dtype, copy=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test hits this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ATM it raises TypeError which we catch and then handle later

elif is_dt_string:
# Strings here are naive, so directly localize
value = value.tz_localize(dtype.tz)
value = dta.tz_localize(dtype.tz)
else:
# Numeric values are UTC at this point,
# so localize and convert
value = value.tz_localize("UTC").tz_convert(dtype.tz)
value = dta.tz_localize("UTC").tz_convert(dtype.tz)
elif is_timedelta64:
value = to_timedelta(value, errors="raise")._values
except OutOfBoundsDatetime:
Expand All @@ -1595,6 +1599,8 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
getattr(value, "dtype", None)
) and not is_datetime64_dtype(dtype):
if is_object_dtype(dtype):
value = cast(np.ndarray, value)

if value.dtype != DT64NS_DTYPE:
value = value.astype(DT64NS_DTYPE)
ints = np.asarray(value).view("i8")
Expand All @@ -1603,25 +1609,20 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):
# we have a non-castable dtype that was passed
raise TypeError(f"Cannot cast datetime64 to {dtype}")

else:

is_array = isinstance(value, np.ndarray)

# catch a datetime/timedelta that is not of ns variety
# and no coercion specified
if is_array and value.dtype.kind in ["M", "m"]:
elif isinstance(value, np.ndarray):
if value.dtype.kind in ["M", "m"]:
# catch a datetime/timedelta that is not of ns variety
# and no coercion specified
value = sanitize_to_nanoseconds(value)

elif value.dtype == object:
value = maybe_infer_to_datetimelike(value)

else:
# only do this if we have an array and the dtype of the array is not
# setup already we are not an integer/object, so don't bother with this
# conversion
elif not (
is_array
and not (
issubclass(value.dtype.type, np.integer) or value.dtype == np.object_
)
):
value = maybe_infer_to_datetimelike(value)
value = maybe_infer_to_datetimelike(value)

return value

Expand Down Expand Up @@ -1835,7 +1836,9 @@ def construct_1d_ndarray_preserving_na(
return subarr


def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False):
def maybe_cast_to_integer_array(
arr: Union[list, np.ndarray], dtype: np.dtype, copy: bool = False
):
"""
Takes any dtype and returns the casted version, raising for when data is
incompatible with integer/unsigned integer dtypes.
Expand All @@ -1844,9 +1847,9 @@ def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False):

Parameters
----------
arr : array-like
arr : np.ndarray or list
The array to cast.
dtype : str, np.dtype
dtype : np.dtype
The integer dtype to cast the array to.
copy: bool, default False
Whether to make a copy of the array before returning.
Expand Down Expand Up @@ -1880,7 +1883,7 @@ def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False):
assert is_integer_dtype(dtype)

try:
if not hasattr(arr, "astype"):
if not isinstance(arr, np.ndarray):
casted = np.array(arr, dtype=dtype, copy=copy)
else:
casted = arr.astype(dtype, copy=copy)
Expand Down