From 02126ddc21ca510e8a0652431abb6df7028ca12e Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 18:12:59 +0200 Subject: [PATCH 01/12] BUG: Fix implicit conversion to float64 with isin() --- doc/source/reference/arrays.rst | 1 + doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/algorithms.py | 6 +-- pandas/core/dtypes/api.py | 2 + pandas/core/dtypes/common.py | 75 +++++++++++++++++++++++++++++++++ pandas/tests/api/test_api.py | 1 + pandas/tests/api/test_types.py | 1 + pandas/tests/test_algos.py | 7 +++ 8 files changed, 90 insertions(+), 4 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index d37eebef5c0c0..fd3bbff1843a3 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -667,6 +667,7 @@ Data type introspection api.types.is_dtype_equal api.types.is_extension_array_dtype api.types.is_float_dtype + api.types.is_implicit_conversion_to_float64 api.types.is_int64_dtype api.types.is_integer_dtype api.types.is_interval_dtype diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5c53267158eab..f3cbe9126a443 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -404,6 +404,7 @@ Other API changes - Index set operations (like union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining the dtype of the resulting Index (:issue:`60797`) +- Added :func:`pandas.api.types.is_implicit_conversion_to_float64` to check if there is a silent conversion to float64 between two dtypes(:issue:`61676`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7fc391d3ffb51..d3b276e55cd42 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -47,15 +47,14 @@ is_bool_dtype, is_complex_dtype, is_dict_like, - is_dtype_equal, is_extension_array_dtype, is_float, is_float_dtype, + is_implicit_conversion_to_float64, is_integer, is_integer_dtype, is_list_like, is_object_dtype, - is_signed_integer_dtype, needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat @@ -511,8 +510,7 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: if ( len(values) > 0 and values.dtype.kind in "iufcb" - and not is_signed_integer_dtype(comps) - and not is_dtype_equal(values, comps) + and is_implicit_conversion_to_float64(values, comps) ): # GH#46485 Use object to avoid upcast to float64 later # TODO: Share with _find_common_type_compat diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index e66104d6afcd9..3f504228b15e7 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -17,6 +17,7 @@ is_float, is_float_dtype, is_hashable, + is_implicit_conversion_to_float64, is_int64_dtype, is_integer, is_integer_dtype, @@ -59,6 +60,7 @@ "is_float", "is_float_dtype", "is_hashable", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 68d99937f728c..595dcddea463e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -713,6 +713,80 @@ def is_dtype_equal(source, target) -> bool: return False +def is_implicit_conversion_to_float64(source, target) -> bool: + """ + Check if there is an implicit conversion to float64 with both dtypes. + + Parameters + ---------- + source : type or str + The first dtype to compare. + target : type or str + The second dtype to compare. + + Returns + ------- + boolean + Whether or not there is an implicit conversion to float64. + + See AlsoMore actions + -------- + api.types.is_categorical_dtype : Check whether the provided array or dtype + is of the Categorical dtype. + api.types.is_string_dtype : Check whether the provided array or dtype + is of the string dtype. + api.types.is_object_dtype : Check whether an array-like or dtype is of the + object dtype. + + Examples + -------- + >>> from pandas.api.types import is_implicit_conversion_to_float64 + >>> is_implicit_conversion_to_float64(int, float) + False + >>> is_implicit_conversion_to_float64("int", int) + False + >>> import numpy as np + >>> is_implicit_conversion_to_float64(int, np.int64) + False + >>> is_implicit_conversion_to_float64(np.uint64, np.int64) + True + >>> is_implicit_conversion_to_float64(np.uint64, np.float64) + False + >>> is_implicit_conversion_to_float64(np.uint64, np.uint64) + False + >>> is_implicit_conversion_to_float64(np.uint32, np.uint32) + False + >>> is_implicit_conversion_to_float64(np.uint32, np.int32) + False + >>> is_implicit_conversion_to_float64(np.int32, np.int32) + False + >>> is_implicit_conversion_to_float64(object, "category") + False + import pandas as pd + >>> is_implicit_conversion_to_float64(np.int64, pd.UInt64Dtype()) + True + >>> from pandas.core.dtypes.dtypes import CategoricalDtype + >>> is_implicit_conversion_to_float64(CategoricalDtype(), "category") + False + """ + try: + src = _get_dtype(source) + tar = _get_dtype(target) + # check only valid dtypes related to implicit conversion to float64 + # other data types derived from 64-bit integers such as U/Int64Dtype + # should also work + if ( + src.kind in "iu" and src.itemsize == 8 + and tar.kind in "iu" and tar.itemsize == 8 + ): + return src != tar + else: + return False + except (TypeError, AttributeError, ImportError): + # invalid comparison + return False + + def is_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an integer dtype. @@ -1934,6 +2008,7 @@ def is_all_strings(value: ArrayLike) -> bool: "is_extension_array_dtype", "is_file_like", "is_float_dtype", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer_dtype", "is_interval_dtype", diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 871e977cbe2f8..24019b8e036a1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -295,6 +295,7 @@ class TestApi(Base): "is_float", "is_float_dtype", "is_hashable", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index bf39370c49d76..659b81a417cb6 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -20,6 +20,7 @@ class TestTypes(Base): "is_dtype_equal", "is_float", "is_float_dtype", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7fb421e27bb40..ec87441e3941a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1197,6 +1197,13 @@ def test_isin_unsigned_dtype(self): expected = Series(False) tm.assert_series_equal(result, expected) + def test_isin_unsigned_dtype_other_side(self): + # GH#46485 + ser = Series([1378774140726870442], dtype=np.int64) + result = ser.isin([np.uint64(1378774140726870528)]) + expected = Series(False) + tm.assert_series_equal(result, expected) + class TestValueCounts: def test_value_counts(self): From 73ad0dc86f9a279f49254c819889a66dafd052e2 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 19:48:49 +0200 Subject: [PATCH 02/12] fix pre-commit --- pandas/core/dtypes/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 595dcddea463e..42224b4c528fc 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -776,8 +776,10 @@ def is_implicit_conversion_to_float64(source, target) -> bool: # other data types derived from 64-bit integers such as U/Int64Dtype # should also work if ( - src.kind in "iu" and src.itemsize == 8 - and tar.kind in "iu" and tar.itemsize == 8 + src.kind in "iu" + and src.itemsize == 8 + and tar.kind in "iu" + and tar.itemsize == 8 ): return src != tar else: From e0afe0bde584a59e5c107d7fd5886b311d65ab8f Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 20:44:18 +0200 Subject: [PATCH 03/12] fix type --- pandas/core/dtypes/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 42224b4c528fc..ea152a98a78cc 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -777,9 +777,9 @@ def is_implicit_conversion_to_float64(source, target) -> bool: # should also work if ( src.kind in "iu" - and src.itemsize == 8 + and src.itemsize == 8 # type: ignore[union-attr] and tar.kind in "iu" - and tar.itemsize == 8 + and tar.itemsize == 8 # type: ignore[union-attr] ): return src != tar else: From 802ac84887f9d80c3d912e9d6f51ed0f1ba5a6d0 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 21:17:14 +0200 Subject: [PATCH 04/12] fix typo --- pandas/core/dtypes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index ea152a98a78cc..dc0f969c2a741 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -729,7 +729,7 @@ def is_implicit_conversion_to_float64(source, target) -> bool: boolean Whether or not there is an implicit conversion to float64. - See AlsoMore actions + See Also -------- api.types.is_categorical_dtype : Check whether the provided array or dtype is of the Categorical dtype. From 670a8f9f38b9b0f316e74cff9a6e16bfe95577bd Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 21:40:30 +0200 Subject: [PATCH 05/12] remove import of numpy and pandas --- pandas/core/dtypes/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dc0f969c2a741..9bc3daa151bcb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -745,7 +745,6 @@ def is_implicit_conversion_to_float64(source, target) -> bool: False >>> is_implicit_conversion_to_float64("int", int) False - >>> import numpy as np >>> is_implicit_conversion_to_float64(int, np.int64) False >>> is_implicit_conversion_to_float64(np.uint64, np.int64) @@ -762,7 +761,6 @@ def is_implicit_conversion_to_float64(source, target) -> bool: False >>> is_implicit_conversion_to_float64(object, "category") False - import pandas as pd >>> is_implicit_conversion_to_float64(np.int64, pd.UInt64Dtype()) True >>> from pandas.core.dtypes.dtypes import CategoricalDtype From eaba844bac80b0c261e813dbbf0710ea991829a7 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Wed, 25 Jun 2025 21:50:46 +0200 Subject: [PATCH 06/12] import code into isin() and remove function --- doc/source/reference/arrays.rst | 1 - doc/source/whatsnew/v3.0.0.rst | 1 - pandas/core/algorithms.py | 26 +++++++++--- pandas/core/dtypes/api.py | 2 - pandas/core/dtypes/common.py | 75 --------------------------------- pandas/tests/api/test_api.py | 1 - pandas/tests/api/test_types.py | 1 - 7 files changed, 20 insertions(+), 87 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index fd3bbff1843a3..d37eebef5c0c0 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -667,7 +667,6 @@ Data type introspection api.types.is_dtype_equal api.types.is_extension_array_dtype api.types.is_float_dtype - api.types.is_implicit_conversion_to_float64 api.types.is_int64_dtype api.types.is_integer_dtype api.types.is_interval_dtype diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f3cbe9126a443..5c53267158eab 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -404,7 +404,6 @@ Other API changes - Index set operations (like union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining the dtype of the resulting Index (:issue:`60797`) -- Added :func:`pandas.api.types.is_implicit_conversion_to_float64` to check if there is a silent conversion to float64 between two dtypes(:issue:`61676`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index d3b276e55cd42..e687b6f5845c2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -50,7 +50,6 @@ is_extension_array_dtype, is_float, is_float_dtype, - is_implicit_conversion_to_float64, is_integer, is_integer_dtype, is_list_like, @@ -507,11 +506,26 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: orig_values = list(values) values = _ensure_arraylike(orig_values, func_name="isin-targets") - if ( - len(values) > 0 - and values.dtype.kind in "iufcb" - and is_implicit_conversion_to_float64(values, comps) - ): + try: + src = comps.dtype + tar = values.dtype + # check only valid dtypes related to implicit conversion to float64 + # other data types derived from 64-bit integers such as U/Int64Dtype + # should also work + if ( + src.kind in "iu" + and src.itemsize == 8 # type: ignore[union-attr] + and tar.kind in "iu" + and tar.itemsize == 8 # type: ignore[union-attr] + ): + is_implicit_conversion_to_float64 = src != tar + else: + is_implicit_conversion_to_float64 = False + except (TypeError, AttributeError, ImportError): + # invalid comparison + is_implicit_conversion_to_float64 = False + + if (is_implicit_conversion_to_float64): # GH#46485 Use object to avoid upcast to float64 later # TODO: Share with _find_common_type_compat values = construct_1d_object_array_from_listlike(orig_values) diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index 3f504228b15e7..e66104d6afcd9 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -17,7 +17,6 @@ is_float, is_float_dtype, is_hashable, - is_implicit_conversion_to_float64, is_int64_dtype, is_integer, is_integer_dtype, @@ -60,7 +59,6 @@ "is_float", "is_float_dtype", "is_hashable", - "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 9bc3daa151bcb..68d99937f728c 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -713,80 +713,6 @@ def is_dtype_equal(source, target) -> bool: return False -def is_implicit_conversion_to_float64(source, target) -> bool: - """ - Check if there is an implicit conversion to float64 with both dtypes. - - Parameters - ---------- - source : type or str - The first dtype to compare. - target : type or str - The second dtype to compare. - - Returns - ------- - boolean - Whether or not there is an implicit conversion to float64. - - See Also - -------- - api.types.is_categorical_dtype : Check whether the provided array or dtype - is of the Categorical dtype. - api.types.is_string_dtype : Check whether the provided array or dtype - is of the string dtype. - api.types.is_object_dtype : Check whether an array-like or dtype is of the - object dtype. - - Examples - -------- - >>> from pandas.api.types import is_implicit_conversion_to_float64 - >>> is_implicit_conversion_to_float64(int, float) - False - >>> is_implicit_conversion_to_float64("int", int) - False - >>> is_implicit_conversion_to_float64(int, np.int64) - False - >>> is_implicit_conversion_to_float64(np.uint64, np.int64) - True - >>> is_implicit_conversion_to_float64(np.uint64, np.float64) - False - >>> is_implicit_conversion_to_float64(np.uint64, np.uint64) - False - >>> is_implicit_conversion_to_float64(np.uint32, np.uint32) - False - >>> is_implicit_conversion_to_float64(np.uint32, np.int32) - False - >>> is_implicit_conversion_to_float64(np.int32, np.int32) - False - >>> is_implicit_conversion_to_float64(object, "category") - False - >>> is_implicit_conversion_to_float64(np.int64, pd.UInt64Dtype()) - True - >>> from pandas.core.dtypes.dtypes import CategoricalDtype - >>> is_implicit_conversion_to_float64(CategoricalDtype(), "category") - False - """ - try: - src = _get_dtype(source) - tar = _get_dtype(target) - # check only valid dtypes related to implicit conversion to float64 - # other data types derived from 64-bit integers such as U/Int64Dtype - # should also work - if ( - src.kind in "iu" - and src.itemsize == 8 # type: ignore[union-attr] - and tar.kind in "iu" - and tar.itemsize == 8 # type: ignore[union-attr] - ): - return src != tar - else: - return False - except (TypeError, AttributeError, ImportError): - # invalid comparison - return False - - def is_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an integer dtype. @@ -2008,7 +1934,6 @@ def is_all_strings(value: ArrayLike) -> bool: "is_extension_array_dtype", "is_file_like", "is_float_dtype", - "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer_dtype", "is_interval_dtype", diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 24019b8e036a1..871e977cbe2f8 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -295,7 +295,6 @@ class TestApi(Base): "is_float", "is_float_dtype", "is_hashable", - "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 659b81a417cb6..bf39370c49d76 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -20,7 +20,6 @@ class TestTypes(Base): "is_dtype_equal", "is_float", "is_float_dtype", - "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", From 924f6652e5cecb2de98a1bd8b9cece458deeff24 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Wed, 25 Jun 2025 22:02:33 +0200 Subject: [PATCH 07/12] fix pre-commit --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e687b6f5845c2..849d381a58b95 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -525,7 +525,7 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: # invalid comparison is_implicit_conversion_to_float64 = False - if (is_implicit_conversion_to_float64): + if is_implicit_conversion_to_float64: # GH#46485 Use object to avoid upcast to float64 later # TODO: Share with _find_common_type_compat values = construct_1d_object_array_from_listlike(orig_values) From 10debda44e9df592a2284fddf7f9b8646956e63d Mon Sep 17 00:00:00 2001 From: pbrochart Date: Wed, 25 Jun 2025 22:11:39 +0200 Subject: [PATCH 08/12] fix type --- pandas/core/algorithms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 849d381a58b95..f30846274e0bc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -507,8 +507,8 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: values = _ensure_arraylike(orig_values, func_name="isin-targets") try: - src = comps.dtype - tar = values.dtype + src = comps.dtype # type: ignore[union-attr] + tar = values.dtype # type: ignore[union-attr] # check only valid dtypes related to implicit conversion to float64 # other data types derived from 64-bit integers such as U/Int64Dtype # should also work From c8f854c2876c80586444ae7ef0d4c863c78a2ec2 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Wed, 25 Jun 2025 22:28:04 +0200 Subject: [PATCH 09/12] remove unused type --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f30846274e0bc..3c7344b51cf41 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -508,7 +508,7 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: try: src = comps.dtype # type: ignore[union-attr] - tar = values.dtype # type: ignore[union-attr] + tar = values.dtype # check only valid dtypes related to implicit conversion to float64 # other data types derived from 64-bit integers such as U/Int64Dtype # should also work From fb52fb6d2efffdd33ecd5f0da6bf98685c23027b Mon Sep 17 00:00:00 2001 From: pbrochart Date: Sat, 28 Jun 2025 14:00:16 +0200 Subject: [PATCH 10/12] better patch --- pandas/core/algorithms.py | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3c7344b51cf41..877795028c717 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -506,30 +506,6 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: orig_values = list(values) values = _ensure_arraylike(orig_values, func_name="isin-targets") - try: - src = comps.dtype # type: ignore[union-attr] - tar = values.dtype - # check only valid dtypes related to implicit conversion to float64 - # other data types derived from 64-bit integers such as U/Int64Dtype - # should also work - if ( - src.kind in "iu" - and src.itemsize == 8 # type: ignore[union-attr] - and tar.kind in "iu" - and tar.itemsize == 8 # type: ignore[union-attr] - ): - is_implicit_conversion_to_float64 = src != tar - else: - is_implicit_conversion_to_float64 = False - except (TypeError, AttributeError, ImportError): - # invalid comparison - is_implicit_conversion_to_float64 = False - - if is_implicit_conversion_to_float64: - # GH#46485 Use object to avoid upcast to float64 later - # TODO: Share with _find_common_type_compat - values = construct_1d_object_array_from_listlike(orig_values) - elif isinstance(values, ABCMultiIndex): # Avoid raising in extract_array values = np.array(values) @@ -581,6 +557,16 @@ def f(c, v): else: common = np_find_common_type(values.dtype, comps_array.dtype) + if ( + values.dtype.kind in "iu" + and comps_array.dtype.kind in "iu" + and common == np.float64 + ): + # GH#46485 + # Let's np_find_common_type do the job and return float64 + # when it cannot do otherwise with integers + # We replace it by an object + common = np.dtype("O") values = values.astype(common, copy=False) comps_array = comps_array.astype(common, copy=False) f = htable.ismember From a2f2bffbc79ffc236f75710dddcddb520cb42e64 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Tue, 1 Jul 2025 11:52:39 +0200 Subject: [PATCH 11/12] another better patch --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/algorithms.py | 10 ---------- pandas/core/dtypes/cast.py | 7 +++++++ pandas/tests/arrays/integer/test_concat.py | 4 ++-- pandas/tests/dtypes/cast/test_find_common_type.py | 15 +++++++++++---- pandas/tests/indexes/interval/test_setops.py | 2 ++ 6 files changed, 23 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5c53267158eab..c402f7b6d08b6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -404,6 +404,7 @@ Other API changes - Index set operations (like union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining the dtype of the resulting Index (:issue:`60797`) +- ``np_find_common_type`` will now return ``object`` for mixed ``int64`` and ``uint64`` dtypes to avoid precision lost (:issue:`61676`, :issue:`61688`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 877795028c717..0406252521e6a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -557,16 +557,6 @@ def f(c, v): else: common = np_find_common_type(values.dtype, comps_array.dtype) - if ( - values.dtype.kind in "iu" - and comps_array.dtype.kind in "iu" - and common == np.float64 - ): - # GH#46485 - # Let's np_find_common_type do the job and return float64 - # when it cannot do otherwise with integers - # We replace it by an object - common = np.dtype("O") values = values.astype(common, copy=False) comps_array = comps_array.astype(common, copy=False) f = htable.ismember diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index dae04ba6244d4..2ff0561d53f23 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1418,6 +1418,13 @@ def np_find_common_type(*dtypes: np.dtype) -> np.dtype: # so fall back to object (find_common_dtype did unless there # was only one dtype) common_dtype = np.dtype("O") + elif ( + # Some precision is lost with float64 when handling uint64/int64 + # Use object instead for the common type + all(np.dtype(x).kind in "iu" and np.dtype(x).itemsize == 8 for x in dtypes) + and common_dtype == np.float64 + ): + common_dtype = np.dtype("O") except TypeError: common_dtype = np.dtype("O") diff --git a/pandas/tests/arrays/integer/test_concat.py b/pandas/tests/arrays/integer/test_concat.py index feba574da548f..8fc5405be712e 100644 --- a/pandas/tests/arrays/integer/test_concat.py +++ b/pandas/tests/arrays/integer/test_concat.py @@ -14,7 +14,7 @@ (["Int8", "Int16"], "Int16"), (["UInt8", "Int8"], "Int16"), (["Int32", "UInt32"], "Int64"), - (["Int64", "UInt64"], "Float64"), + (["Int64", "UInt64"], "object"), (["Int64", "boolean"], "object"), (["UInt8", "boolean"], "object"), ], @@ -48,7 +48,7 @@ def test_concat_series(to_concat_dtypes, result_dtype): (["Int8", "int16"], "Int16"), (["UInt8", "int8"], "Int16"), (["Int32", "uint32"], "Int64"), - (["Int64", "uint64"], "Float64"), + (["Int64", "uint64"], "object"), (["Int64", "bool"], "object"), (["UInt8", "bool"], "object"), ], diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 83ef7382fbe8a..1169ff967b357 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -31,7 +31,7 @@ ((np.float16, np.float32), np.float32), ((np.float16, np.int16), np.float32), ((np.float32, np.int16), np.float32), - ((np.uint64, np.int64), np.float64), + ((np.uint64, np.int64), object), ((np.int16, np.float64), np.float64), ((np.float16, np.int64), np.float64), # Into others. @@ -155,9 +155,16 @@ def test_interval_dtype(left, right): elif left.subtype.kind in ["i", "u", "f"]: # i.e. numeric if right.subtype.kind in ["i", "u", "f"]: - # both numeric -> common numeric subtype - expected = IntervalDtype(np.float64, "right") - assert result == expected + if ( + left.subtype.kind in ["i", "u"] + and right.subtype.kind in ["i", "u"] + and left.subtype.kind != right.subtype.kind + ): + assert result == object + else: + # both numeric -> common numeric subtype + expected = IntervalDtype(np.float64, "right") + assert result == expected else: assert result == object diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index 1b0816a9405cb..15679973cd857 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -55,6 +55,7 @@ def test_union_empty_result(self, closed, sort): tm.assert_index_equal(result, expected) other = empty_index(dtype="uint64", closed=closed) + expected = Index([], dtype='object') result = index.union(other, sort=sort) tm.assert_index_equal(result, expected) @@ -117,6 +118,7 @@ def test_intersection_empty_result(self, closed, sort): tm.assert_index_equal(result, expected) other = monotonic_index(300, 314, dtype="uint64", closed=closed) + expected = Index([], dtype='object') result = index.intersection(other, sort=sort) tm.assert_index_equal(result, expected) From d996cf60accfb10e12fc6c1dab780da41e2bce38 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 1 Jul 2025 09:57:26 +0000 Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/tests/indexes/interval/test_setops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index 15679973cd857..97f8a662b493d 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -55,7 +55,7 @@ def test_union_empty_result(self, closed, sort): tm.assert_index_equal(result, expected) other = empty_index(dtype="uint64", closed=closed) - expected = Index([], dtype='object') + expected = Index([], dtype="object") result = index.union(other, sort=sort) tm.assert_index_equal(result, expected) @@ -118,7 +118,7 @@ def test_intersection_empty_result(self, closed, sort): tm.assert_index_equal(result, expected) other = monotonic_index(300, 314, dtype="uint64", closed=closed) - expected = Index([], dtype='object') + expected = Index([], dtype="object") result = index.intersection(other, sort=sort) tm.assert_index_equal(result, expected)