diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 8732e1c397ce5..d1e209adb1b8f 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -623,6 +623,8 @@ Reshaping - Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`) - Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`) - Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`) +- Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`) +- Sparse ^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 9c7107ab40644..6c6b14653df75 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -745,13 +745,15 @@ def _convert_level_number(level_num, columns): if frame._is_homogeneous_type and is_extension_array_dtype( frame.dtypes.iloc[0] ): + # TODO(EA2D): won't need special case, can go through .values + # paths below (might change to ._values) dtype = this[this.columns[loc]].dtypes.iloc[0] subset = this[this.columns[loc]] value_slice = dtype.construct_array_type()._concat_same_type( [x._values for _, x in subset.items()] ) - N, K = this.shape + N, K = subset.shape idx = np.arange(N * K).reshape(K, N).T.ravel() value_slice = value_slice.take(idx) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 404baecdfecac..62512249dabfc 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -2099,3 +2099,27 @@ def test_stack_unsorted(self): result = DF.stack(["VAR", "TYP"]).sort_index() expected = DF.sort_index(axis=1).stack(["VAR", "TYP"]).sort_index() tm.assert_series_equal(result, expected) + + def test_stack_nullable_dtype(self): + # GH#43561 + columns = MultiIndex.from_product( + [["54511", "54515"], ["r", "t_mean"]], names=["station", "element"] + ) + index = Index([1, 2, 3], name="time") + + arr = np.array([[50, 226, 10, 215], [10, 215, 9, 220], [305, 232, 111, 220]]) + df = DataFrame(arr, columns=columns, index=index, dtype=pd.Int64Dtype()) + + result = df.stack("station") + + expected = df.astype(np.int64).stack("station").astype(pd.Int64Dtype()) + tm.assert_frame_equal(result, expected) + + # non-homogeneous case + df[df.columns[0]] = df[df.columns[0]].astype(pd.Float64Dtype()) + result = df.stack("station") + + # TODO(EA2D): we get object dtype because DataFrame.values can't + # be an EA + expected = df.astype(object).stack("station") + tm.assert_frame_equal(result, expected)