diff --git a/pandas/core/construction.py b/pandas/core/construction.py index dd75473da6d78..1172fd25de3e4 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -521,6 +521,9 @@ def sanitize_array( subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: + # realize e.g. generators + # TODO: non-standard array-likes we can convert to ndarray more efficiently? + data = list(data) subarr = _try_cast(data, dtype, copy, raise_cast_failure) subarr = _sanitize_ndim(subarr, data, dtype, index) @@ -594,13 +597,18 @@ def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike: return arr -def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool): +def _try_cast( + arr: Union[list, np.ndarray], + dtype: Optional[DtypeObj], + copy: bool, + raise_cast_failure: bool, +) -> ArrayLike: """ Convert input to numpy ndarray and optionally cast to a given dtype. Parameters ---------- - arr : ndarray, list, tuple, iterator (catchall) + arr : ndarray or list Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool @@ -608,6 +616,10 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo raise_cast_failure : bool If True, and if a dtype is specified, raise errors during casting. Otherwise an object array is returned. + + Returns + ------- + np.ndarray or ExtensionArray """ # perf shortcut as this is the most common case if ( diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3a6830467ab50..531d784925e9d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1422,7 +1422,7 @@ def maybe_infer_to_datetimelike( v = np.array(v, copy=False) # we only care about object dtypes - if not is_object_dtype(v): + if not is_object_dtype(v.dtype): return value shape = v.shape @@ -1499,7 +1499,9 @@ def try_timedelta(v: np.ndarray) -> np.ndarray: return value -def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): +def maybe_cast_to_datetime( + value: Union[ExtensionArray, np.ndarray, list], dtype: Optional[DtypeObj] +) -> Union[ExtensionArray, np.ndarray, list]: """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT @@ -1563,26 +1565,28 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): try: if is_datetime64: - value = to_datetime(value, errors="raise") + dti = to_datetime(value, errors="raise") # GH 25843: Remove tz information since the dtype # didn't specify one - if value.tz is not None: - value = value.tz_localize(None) - value = value._values + if dti.tz is not None: + dti = dti.tz_localize(None) + value = dti._values elif is_datetime64tz: # The string check can be removed once issue #13712 # is solved. String data that is passed with a # datetime64tz is assumed to be naive which should # be localized to the timezone. is_dt_string = is_string_dtype(value.dtype) - value = to_datetime(value, errors="raise").array - if is_dt_string: + dta = to_datetime(value, errors="raise").array + if dta.tz is not None: + value = dta.astype(dtype, copy=False) + elif is_dt_string: # Strings here are naive, so directly localize - value = value.tz_localize(dtype.tz) + value = dta.tz_localize(dtype.tz) else: # Numeric values are UTC at this point, # so localize and convert - value = value.tz_localize("UTC").tz_convert(dtype.tz) + value = dta.tz_localize("UTC").tz_convert(dtype.tz) elif is_timedelta64: value = to_timedelta(value, errors="raise")._values except OutOfBoundsDatetime: @@ -1595,6 +1599,8 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): getattr(value, "dtype", None) ) and not is_datetime64_dtype(dtype): if is_object_dtype(dtype): + value = cast(np.ndarray, value) + if value.dtype != DT64NS_DTYPE: value = value.astype(DT64NS_DTYPE) ints = np.asarray(value).view("i8") @@ -1603,25 +1609,20 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): # we have a non-castable dtype that was passed raise TypeError(f"Cannot cast datetime64 to {dtype}") - else: - - is_array = isinstance(value, np.ndarray) - - # catch a datetime/timedelta that is not of ns variety - # and no coercion specified - if is_array and value.dtype.kind in ["M", "m"]: + elif isinstance(value, np.ndarray): + if value.dtype.kind in ["M", "m"]: + # catch a datetime/timedelta that is not of ns variety + # and no coercion specified value = sanitize_to_nanoseconds(value) + elif value.dtype == object: + value = maybe_infer_to_datetimelike(value) + + else: # only do this if we have an array and the dtype of the array is not # setup already we are not an integer/object, so don't bother with this # conversion - elif not ( - is_array - and not ( - issubclass(value.dtype.type, np.integer) or value.dtype == np.object_ - ) - ): - value = maybe_infer_to_datetimelike(value) + value = maybe_infer_to_datetimelike(value) return value @@ -1835,7 +1836,9 @@ def construct_1d_ndarray_preserving_na( return subarr -def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False): +def maybe_cast_to_integer_array( + arr: Union[list, np.ndarray], dtype: np.dtype, copy: bool = False +): """ Takes any dtype and returns the casted version, raising for when data is incompatible with integer/unsigned integer dtypes. @@ -1844,9 +1847,9 @@ def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False): Parameters ---------- - arr : array-like + arr : np.ndarray or list The array to cast. - dtype : str, np.dtype + dtype : np.dtype The integer dtype to cast the array to. copy: bool, default False Whether to make a copy of the array before returning. @@ -1880,7 +1883,7 @@ def maybe_cast_to_integer_array(arr, dtype: Dtype, copy: bool = False): assert is_integer_dtype(dtype) try: - if not hasattr(arr, "astype"): + if not isinstance(arr, np.ndarray): casted = np.array(arr, dtype=dtype, copy=copy) else: casted = arr.astype(dtype, copy=copy)