diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5b13e13bb20ba..a7fdd6759ba95 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -408,9 +408,58 @@ def maybe_promote(dtype, fill_value=np.nan): dtype = np.object_ elif issubclass(dtype.type, np.integer): # upcast to prevent overflow - arr = np.asarray(fill_value) - if arr != arr.astype(dtype): - dtype = arr.dtype + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # np.dtype ordering considers: + # int[n] < int[2*n] + # uint[n] < uint[2*n] + # u?int[n] < object_ + dtype = mst + + elif np.can_cast(fill_value, dtype): + pass + + elif dtype.kind == "u" and mst.kind == "i": + dtype = np.promote_types(dtype, mst) + if dtype.kind == "f": + # Case where we disagree with numpy + dtype = np.dtype(np.object_) + + elif dtype.kind == "i" and mst.kind == "u": + + if fill_value > np.iinfo(np.int64).max: + # object is the only way to represent fill_value and keep + # the range allowed by the given dtype + dtype = np.dtype(np.object_) + + elif mst.itemsize < dtype.itemsize: + pass + + elif dtype.itemsize == mst.itemsize: + # We never cast signed to unsigned because that loses + # parts of the original range, so find the smallest signed + # integer that can hold all of `mst`. + ndt = { + np.int64: np.object_, + np.int32: np.int64, + np.int16: np.int32, + np.int8: np.int16, + }[dtype.type] + dtype = np.dtype(ndt) + + else: + # bump to signed integer dtype that holds all of `mst` range + # Note: we have to use itemsize because some (windows) + # builds don't satisfiy e.g. np.uint32 == np.uint32 + ndt = { + 4: np.int64, + 2: np.int32, + 1: np.int16, # TODO: Test for this case + }[mst.itemsize] + dtype = np.dtype(ndt) + + fill_value = dtype.type(fill_value) + elif issubclass(dtype.type, np.floating): # check if we can cast if _check_lossless_cast(fill_value, dtype): diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index e4e5a22ea6ca0..8d10ed26a80fa 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -151,7 +151,17 @@ def _assert_match(result_fill_value, expected_fill_value): # GH#23982/25425 require the same type in addition to equality/NA-ness res_type = type(result_fill_value) ex_type = type(expected_fill_value) - assert res_type == ex_type + if res_type.__name__ == "uint64": + # No idea why, but these (sometimes) do not compare as equal + assert ex_type.__name__ == "uint64" + elif res_type.__name__ == "ulonglong": + # On some builds we get this instead of np.uint64 + # Note: cant check res_type.dtype.itemsize directly on numpy 1.18 + assert res_type(0).itemsize == 8 + assert ex_type == res_type or ex_type == np.uint64 + else: + # On some builds, type comparison fails, e.g. np.int32 != np.int32 + assert res_type == ex_type or res_type.__name__ == ex_type.__name__ match_value = result_fill_value == expected_fill_value @@ -275,26 +285,6 @@ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, box): expected_dtype = np.dtype(expected_dtype) boxed, box_dtype = box # read from parametrized fixture - if not boxed: - if expected_dtype == object: - pytest.xfail("overflow error") - if expected_dtype == "int32": - pytest.xfail("always upcasts to platform int") - if dtype == "int8" and expected_dtype == "int16": - pytest.xfail("casts to int32 instead of int16") - if ( - issubclass(dtype.type, np.unsignedinteger) - and np.iinfo(dtype).max < fill_value <= np.iinfo("int64").max - ): - pytest.xfail("falsely casts to signed") - if (dtype, expected_dtype) in [ - ("uint8", "int16"), - ("uint32", "int64"), - ] and fill_value != np.iinfo("int32").min - 1: - pytest.xfail("casts to int32 instead of int8/int16") - # this following xfail is "only" a consequence of the - now strictly - # enforced - principle that maybe_promote_with_scalar always casts - pytest.xfail("wrong return type of fill_value") if boxed: if expected_dtype != object: pytest.xfail("falsely casts to object")