From e4e5d9959445b16924c2c69782156c975c012cb9 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Jul 2023 09:05:43 -0700 Subject: [PATCH 1/4] REF: de-duplicate ensure_np_dtype --- pandas/core/internals/array_manager.py | 16 +++------------- pandas/core/internals/base.py | 18 ++++++++++++++++++ pandas/core/internals/managers.py | 12 ++---------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 3b77540efcdd2..14969425e75a7 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -35,11 +35,7 @@ is_object_dtype, is_timedelta64_ns_dtype, ) -from pandas.core.dtypes.dtypes import ( - ExtensionDtype, - NumpyEADtype, - SparseDtype, -) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCSeries, @@ -75,6 +71,7 @@ from pandas.core.internals.base import ( DataManager, SingleDataManager, + ensure_np_dtype, interleaved_dtype, ) from pandas.core.internals.blocks import ( @@ -1021,14 +1018,7 @@ def as_array( if not dtype: dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) - if isinstance(dtype, SparseDtype): - dtype = dtype.subtype - elif isinstance(dtype, NumpyEADtype): - dtype = dtype.numpy_dtype - elif isinstance(dtype, ExtensionDtype): - dtype = np.dtype("object") - elif dtype == np.dtype(str): - dtype = np.dtype("object") + dtype = ensure_np_dtype(dtype) result = np.empty(self.shape_proper, dtype=dtype) diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index bd5792ef34f6a..677dd369fa4ee 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -8,6 +8,7 @@ TYPE_CHECKING, Any, Literal, + cast, final, ) @@ -26,6 +27,10 @@ find_common_type, np_can_hold_element, ) +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + SparseDtype, +) from pandas.core.base import PandasObject from pandas.core.construction import extract_array @@ -356,3 +361,16 @@ def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None: return None return find_common_type(dtypes) + + +def ensure_np_dtype(dtype: DtypeObj) -> np.dtype: + # TODO: https://github.com/pandas-dev/pandas/issues/22791 + # Give EAs some input on what happens here. Sparse needs this. + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + dtype = cast(np.dtype, dtype) + elif isinstance(dtype, ExtensionDtype): + dtype = np.dtype("object") + elif dtype == np.dtype(str): + dtype = np.dtype("object") + return dtype diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c19300a8f3d9c..9f19816fd34b7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -65,6 +65,7 @@ from pandas.core.internals.base import ( DataManager, SingleDataManager, + ensure_np_dtype, interleaved_dtype, ) from pandas.core.internals.blocks import ( @@ -1666,16 +1667,7 @@ def _interleave( [blk.dtype for blk in self.blocks] ) - # TODO: https://github.com/pandas-dev/pandas/issues/22791 - # Give EAs some input on what happens here. Sparse needs this. - if isinstance(dtype, SparseDtype): - dtype = dtype.subtype - dtype = cast(np.dtype, dtype) - elif isinstance(dtype, ExtensionDtype): - dtype = np.dtype("object") - elif dtype == np.dtype(str): - dtype = np.dtype("object") - + dtype = ensure_np_dtype(dtype) result = np.empty(self.shape, dtype=dtype) itemmask = np.zeros(self.shape[0]) From 0399b0b035c9066193d4d35d9d1abbcf45297b06 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Jul 2023 13:35:55 -0700 Subject: [PATCH 2/4] REF: simplify as_array --- pandas/core/internals/managers.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 9f19816fd34b7..89b7aa76e645f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1624,16 +1624,12 @@ def as_array( arr = blk.values.to_numpy( # type: ignore[union-attr] dtype=dtype, na_value=na_value, + copy=copy, ).reshape(blk.shape) else: - arr = np.asarray(blk.get_values()) - if dtype: - arr = arr.astype(dtype, copy=copy) - copy = False + arr = np.array(blk.values, dtype=dtype, copy=copy) - if copy: - arr = arr.copy() - elif using_copy_on_write(): + if using_copy_on_write(): arr = arr.view() arr.flags.writeable = False else: From f88207334540232a0a1b3fd36e0f0a98486110de Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Jul 2023 13:59:12 -0700 Subject: [PATCH 3/4] mypy fixup --- pandas/core/internals/managers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2e347b19a181b..f06f6eabdcf3c 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1663,7 +1663,9 @@ def _interleave( [blk.dtype for blk in self.blocks] ) - dtype = ensure_np_dtype(dtype) + # error: Argument 1 to "ensure_np_dtype" has incompatible type + # "Optional[dtype[Any]]"; expected "Union[dtype[Any], ExtensionDtype]" + dtype = ensure_np_dtype(dtype) # type: ignore[arg-type] result = np.empty(self.shape, dtype=dtype) itemmask = np.zeros(self.shape[0]) From 0eae2b82ad16b2384bc7f72bc3cb118570202b71 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 30 Jul 2023 13:17:37 -0700 Subject: [PATCH 4/4] Missing check in CoW case --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f06f6eabdcf3c..2146d1f2cef16 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1629,7 +1629,7 @@ def as_array( else: arr = np.array(blk.values, dtype=dtype, copy=copy) - if using_copy_on_write(): + if using_copy_on_write() and not copy: arr = arr.view() arr.flags.writeable = False else: