diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 53f254aee2e0e..811b31edf301a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -267,7 +267,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ -- +- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`) - Sparse diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5312dfe84cfd8..bb8f344d4f0f8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1979,6 +1979,9 @@ def remove_unused_levels(self): has_na = int(len(uniques) and (uniques[0] == -1)) if len(uniques) != len(lev) + has_na: + + if lev.isna().any() and len(uniques) == len(lev): + break # We have unused levels changed = True diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 9825bcb0b5d57..e8ae9f6584ad6 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1907,3 +1907,27 @@ def test_unstack_with_missing_int_cast_to_float(self): ), ) tm.assert_frame_equal(result, expected) + + def test_unstack_with_level_has_nan(self): + # GH 37510 + df1 = DataFrame( + { + "L1": [1, 2, 3, 4], + "L2": [3, 4, 1, 2], + "L3": [1, 1, 1, 1], + "x": [1, 2, 3, 4], + } + ) + df1 = df1.set_index(["L1", "L2", "L3"]) + new_levels = ["n1", "n2", "n3", None] + df1.index = df1.index.set_levels(levels=new_levels, level="L1") + df1.index = df1.index.set_levels(levels=new_levels, level="L2") + + result = df1.unstack("L3")[("x", 1)].sort_index().index + expected = MultiIndex( + levels=[["n1", "n2", "n3", None], ["n1", "n2", "n3", None]], + codes=[[0, 1, 2, 3], [2, 3, 0, 1]], + names=["L1", "L2"], + ) + + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index e5d178581136b..3de78c5e982d3 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -7,6 +7,7 @@ from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex import pandas._testing as tm +from pandas.core.indexes.frozen import FrozenList def test_sortlevel(idx): @@ -271,3 +272,13 @@ def test_argsort(idx): result = idx.argsort() expected = idx.values.argsort() tm.assert_numpy_array_equal(result, expected) + + +def test_remove_unused_levels_with_nan(): + # GH 37510 + idx = Index([(1, np.nan), (3, 4)]).rename(["id1", "id2"]) + idx = idx.set_levels(["a", np.nan], level="id1") + idx = idx.remove_unused_levels() + result = idx.levels + expected = FrozenList([["a", np.nan], [4]]) + assert str(result) == str(expected)