pandas-dev · jbrockmendel · Jul 3, 2025 · Jul 3, 2025 · Jul 4, 2025 · Jul 4, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -706,6 +706,8 @@ Datetimelike
 - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
 - Bug in :meth:`to_datetime` with ``format="ISO8601"`` and ``utc=True`` where naive timestamps incorrectly inherited timezone offset from previous timestamps in a series. (:issue:`61389`)
 - Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
+- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
+- Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
 
 Timedelta

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -33,3 +33,8 @@
 def using_string_dtype() -> bool:
     _mode_options = _global_config["future"]
     return _mode_options["infer_string"]
+
+
+def using_pyarrow_strict_nans() -> bool:
+    _mode_options = _global_config["mode"]
+    return _mode_options["pyarrow_strict_nans"]
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
@@ -14,3 +14,4 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object) -> bool: ...
 def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
+def is_pdna_or_none(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -249,6 +249,24 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_pdna_or_none(values: ndarray) -> ndarray:
+    cdef:
+        ndarray[uint8_t] result
+        Py_ssize_t i, N
+        object val
+
+    N = len(values)
+    result = np.zeros(N, dtype=np.uint8)
+
+    for i in range(N):
+        val = values[i]
+        if val is None or val is C_NA:
+            result[i] = True
+    return result.view(bool)
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_numeric_na(values: ndarray) -> ndarray:

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1456,7 +1456,7 @@ def _maybe_upcast(
         if isinstance(arr, IntegerArray) and arr.isna().all():
             # use null instead of int64 in pyarrow
             arr = arr.to_numpy(na_value=None)
-        arr = ArrowExtensionArray(pa.array(arr, from_pandas=True))
+        arr = ArrowExtensionArray(pa.array(arr))
 
     return arr
 

diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py
@@ -7,7 +7,10 @@
 
 import numpy as np
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
+from pandas._libs.missing import NA
 from pandas.errors import LossySetitemError
 
 from pandas.core.dtypes.cast import np_can_hold_element
@@ -21,7 +24,11 @@
 
 
 def to_numpy_dtype_inference(
-    arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool
+    arr: ArrayLike,
+    dtype: npt.DTypeLike | None,
+    na_value,
+    hasna: bool,
+    is_pyarrow: bool = True,
 ) -> tuple[npt.DTypeLike, Any]:
     if dtype is None and is_numeric_dtype(arr.dtype):
         dtype_given = False
@@ -34,7 +41,11 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    na_value = np.nan
+                    if is_pyarrow and using_pyarrow_strict_nans():
+                        na_value = NA
+                        dtype = np.dtype(object)
+                    else:
+                        na_value = np.nan
         else:
             dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
     elif dtype is not None:

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -15,7 +15,10 @@
 
 import numpy as np
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
+from pandas._libs.missing import is_pdna_or_none
 from pandas._libs.tslibs import (
     Timedelta,
     Timestamp,
@@ -63,6 +66,7 @@
 from pandas.core.arrays.masked import BaseMaskedArray
 from pandas.core.arrays.string_ import StringDtype
 import pandas.core.common as com
+from pandas.core.construction import extract_array
 from pandas.core.indexers import (
     check_array_indexer,
     unpack_tuple_and_ellipses,
@@ -322,6 +326,11 @@ def _from_sequence_of_strings(
         """
         Construct a new ExtensionArray from a sequence of strings.
         """
+        mask = isna(strings)
+
+        if isinstance(strings, cls):
+            strings = strings._pa_array
+
         pa_type = to_pyarrow_type(dtype)
         if (
             pa_type is None
@@ -340,22 +349,27 @@ def _from_sequence_of_strings(
             from pandas.core.tools.datetimes import to_datetime
 
             scalars = to_datetime(strings, errors="raise").date
+
+            scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+
         elif pa.types.is_duration(pa_type):
             from pandas.core.tools.timedeltas import to_timedelta
 
             scalars = to_timedelta(strings, errors="raise")
+
             if pa_type.unit != "ns":
                 # GH51175: test_from_sequence_of_strings_pa_array
                 # attempt to parse as int64 reflecting pyarrow's
                 # duration to string casting behavior
                 mask = isna(scalars)
                 if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
-                    strings = pa.array(strings, type=pa.string(), from_pandas=True)
+                    strings = pa.array(strings, type=pa.string(), mask=mask)
                 strings = pc.if_else(mask, None, strings)
                 try:
                     scalars = strings.cast(pa.int64())
                 except pa.ArrowInvalid:
                     pass
+
         elif pa.types.is_time(pa_type):
             from pandas.core.tools.times import to_time
 
@@ -371,7 +385,7 @@ def _from_sequence_of_strings(
             if isinstance(strings, (pa.Array, pa.ChunkedArray)):
                 scalars = strings
             else:
-                scalars = pa.array(strings, type=pa.string(), from_pandas=True)
+                scalars = pa.array(strings, type=pa.string(), mask=mask)
             scalars = pc.if_else(pc.equal(scalars, "1.0"), "1", scalars)
             scalars = pc.if_else(pc.equal(scalars, "0.0"), "0", scalars)
             scalars = scalars.cast(pa.bool_())
@@ -383,6 +397,19 @@ def _from_sequence_of_strings(
             from pandas.core.tools.numeric import to_numeric
 
             scalars = to_numeric(strings, errors="raise")
+            if not pa.types.is_decimal(pa_type) and isinstance(
+                strings, (pa.Array, pa.ChunkedArray)
+            ):
+                # TODO: figure out why doing this cast breaks with decimal dtype
+                #  in test_from_sequence_of_strings_pa_array
+                mask = strings.is_null()
+                scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
+                # TODO: could we just do strings.cast(pa_type)?
+            elif isinstance(strings, (pa.Array, pa.ChunkedArray)):
+                scalars = strings.cast(pa_type)
+            elif mask is not None:
+                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+
         else:
             raise NotImplementedError(
                 f"Converting strings to {pa_type} is not implemented."
@@ -425,7 +452,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         """
         if isinstance(value, pa.Scalar):
             pa_scalar = value
-        elif isna(value):
+        elif isna(value) and not lib.is_float(value):
             pa_scalar = pa.scalar(None, type=pa_type)
         else:
             # Workaround https://github.com/apache/arrow/issues/37291
@@ -442,7 +469,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
                     value = value.as_unit(pa_type.unit)
                 value = value._value
 
-            pa_scalar = pa.scalar(value, type=pa_type, from_pandas=True)
+            pa_scalar = pa.scalar(value, type=pa_type)
 
         if pa_type is not None and pa_scalar.type != pa_type:
             pa_scalar = pa_scalar.cast(pa_type)
@@ -474,6 +501,13 @@ def _box_pa_array(
             if copy:
                 value = value.copy()
             pa_array = value.__arrow_array__()
+
+        elif hasattr(value, "__arrow_array__"):
+            # e.g. StringArray
+            if copy:
+                value = value.copy()
+            pa_array = value.__arrow_array__()
+
         else:
             if (
                 isinstance(value, np.ndarray)
@@ -500,19 +534,52 @@ def _box_pa_array(
                 value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
                 value = value.to_numpy()
 
+            if pa_type is not None and pa.types.is_timestamp(pa_type):
+                # Use DatetimeArray to exclude Decimal(NaN) (GH#61774) and
+                #  ensure constructor treats tznaive the same as non-pyarrow
+                #  dtypes (GH#61775)
+                from pandas.core.arrays.datetimes import (
+                    DatetimeArray,
+                    tz_to_dtype,
+                )
+
+                pass_dtype = tz_to_dtype(tz=pa_type.tz, unit=pa_type.unit)
+                value = extract_array(value, extract_numpy=True)
+                if isinstance(value, DatetimeArray):
+                    dta = value
+                else:
+                    dta = DatetimeArray._from_sequence(
+                        value, copy=copy, dtype=pass_dtype
+                    )
+                dta_mask = dta.isna()
+                value_i8 = cast("npt.NDArray", dta.view("i8"))
+                if not value_i8.flags["WRITEABLE"]:
+                    # e.g. test_setitem_frame_2d_values
+                    value_i8 = value_i8.copy()
+                    dta = DatetimeArray._from_sequence(value_i8, dtype=dta.dtype)
+                value_i8[dta_mask] = 0  # GH#61776 avoid __sub__ overflow
+                pa_array = pa.array(dta._ndarray, type=pa_type, mask=dta_mask)
+                return pa_array
+
+            mask = None
+            if getattr(value, "dtype", None) is None or value.dtype.kind not in "iumMf":
+                arr_value = np.asarray(value, dtype=object)
+                # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
+                mask = is_pdna_or_none(arr_value)
+
             try:
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+                pa_array = pa.array(value, type=pa_type, mask=mask)
             except (pa.ArrowInvalid, pa.ArrowTypeError):
                 # GH50430: let pyarrow infer type, then cast
-                pa_array = pa.array(value, from_pandas=True)
+                pa_array = pa.array(value, mask=mask)
 
             if pa_type is None and pa.types.is_duration(pa_array.type):
                 # Workaround https://github.com/apache/arrow/issues/37291
                 from pandas.core.tools.timedeltas import to_timedelta
 
                 value = to_timedelta(value)
                 value = value.to_numpy()
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+                pa_array = pa.array(value, type=pa_type)
 
             if pa.types.is_duration(pa_array.type) and pa_array.null_count > 0:
                 # GH52843: upstream bug for duration types when originally
@@ -1159,7 +1226,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
         if not len(values):
             return np.zeros(len(self), dtype=bool)
 
-        result = pc.is_in(self._pa_array, value_set=pa.array(values, from_pandas=True))
+        result = pc.is_in(self._pa_array, value_set=pa.array(values))
         # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls
         # to False
         return np.array(result, dtype=np.bool_)
@@ -1440,7 +1507,11 @@ def to_numpy(
             pa.types.is_floating(pa_type)
             and (
                 na_value is np.nan
-                or (original_na_value is lib.no_default and is_float_dtype(dtype))
+                or (
+                    original_na_value is lib.no_default
+                    and is_float_dtype(dtype)
+                    and not using_pyarrow_strict_nans()
+                )
             )
         ):
             result = data._pa_array.to_numpy()
@@ -1966,7 +2037,7 @@ def __setitem__(self, key, value) -> None:
                 raise ValueError("Length of indexer and values mismatch")
             chunks = [
                 *self._pa_array[:key].chunks,
-                pa.array([value], type=self._pa_array.type, from_pandas=True),
+                pa.array([value], type=self._pa_array.type),
                 *self._pa_array[key + 1 :].chunks,
             ]
             data = pa.chunked_array(chunks).combine_chunks()
@@ -2020,7 +2091,7 @@ def _rank_calc(
                 pa_type = pa.float64()
             else:
                 pa_type = pa.uint64()
-            result = pa.array(ranked, type=pa_type, from_pandas=True)
+            result = pa.array(ranked, type=pa_type)
             return result
 
         data = self._pa_array.combine_chunks()
@@ -2272,7 +2343,7 @@ def _to_numpy_and_type(value) -> tuple[np.ndarray, pa.DataType | None]:
         right, right_type = _to_numpy_and_type(right)
         pa_type = left_type or right_type
         result = np.where(cond, left, right)
-        return pa.array(result, type=pa_type, from_pandas=True)
+        return pa.array(result, type=pa_type)
 
     @classmethod
     def _replace_with_mask(
@@ -2313,9 +2384,10 @@ def _replace_with_mask(
             replacements = np.array(replacements, dtype=object)
         elif isinstance(replacements, pa.Scalar):
             replacements = replacements.as_py()
+
         result = np.array(values, dtype=object)
         result[mask] = replacements
-        return pa.array(result, type=values.type, from_pandas=True)
+        return pa.array(result, type=values.type)
 
     # ------------------------------------------------------------------
     # GroupBy Methods
@@ -2394,7 +2466,7 @@ def _groupby_op(
             return type(self)(pa_result)
         else:
             # DatetimeArray, TimedeltaArray
-            pa_result = pa.array(result, from_pandas=True)
+            pa_result = pa.array(result)
             return type(self)(pa_result)
 
     def _apply_elementwise(self, func: Callable) -> list[list[Any]]:

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -2539,6 +2539,14 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
             if result is not NotImplemented:
                 return result
 
+        # TODO: putting this here is hacky as heck
+        if self.dtype == "float64[pyarrow]":
+            # e.g. test_log_arrow_backed_missing_value
+            new_inputs = [
+                x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
+            ]
+            return getattr(ufunc, method)(*new_inputs, **kwargs)
+
         return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
 
     def map(self, mapper, na_action: Literal["ignore"] | None = None):

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -484,7 +484,9 @@ def to_numpy(
         array([ True, False, False])
         """
         hasna = self._hasna
-        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
+        dtype, na_value = to_numpy_dtype_inference(
+            self, dtype, na_value, hasna, is_pyarrow=False
+        )
         if dtype is None:
             dtype = object
 

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -481,6 +481,12 @@ def _str_map_str_or_object(
             if self.dtype.storage == "pyarrow":
                 import pyarrow as pa
 
+                # TODO: shouldn't this already be caught my passed mask?
+                #  it isn't in test_extract_expand_capture_groups_index
+                # mask = mask | np.array(
+                #    [x is libmissing.NA for x in result], dtype=bool
+                #    )
+
                 result = pa.array(
                     result, mask=mask, type=pa.large_string(), from_pandas=True
                 )
@@ -733,7 +739,7 @@ def __arrow_array__(self, type=None):
 
         values = self._ndarray.copy()
         values[self.isna()] = None
-        return pa.array(values, type=type, from_pandas=True)
+        return pa.array(values, type=type)
 
     def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:  # type: ignore[override]
         arr = self._ndarray