diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 6ace245a4bae1..3f29b0b7b31fb 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1304,6 +1304,7 @@ Groupby/Resample/Rolling - :func:`RollingGroupby.agg` and :func:`ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`) - Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) - Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`). Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5d9a5616e133b..451f1199ac8e6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -586,14 +586,17 @@ def _choose_path(self, fast_path, slow_path, group): try: res_fast = fast_path(group) - # compare that we get the same results + # verify fast path does not change columns (and names), otherwise + # its results cannot be joined with those of the slow path + if res_fast.columns != group.columns: + return path, res + # verify numerical equality with the slow path if res.shape == res_fast.shape: res_r = res.values.ravel() res_fast_r = res_fast.values.ravel() mask = notna(res_r) - if (res_r[mask] == res_fast_r[mask]).all(): - path = fast_path - + if (res_r[mask] == res_fast_r[mask]).all(): + path = fast_path except Exception: pass return path, res diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index aec51afb99ef0..4cf63a321a47a 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -808,3 +808,26 @@ def test_any_all_np_func(func): res = df.groupby('key')['val'].transform(func) tm.assert_series_equal(res, exp) + + +def test_groupby_transform_rename(): + # https://github.com/pandas-dev/pandas/issues/23461 + def demean_rename(x): + result = x - x.mean() + + if isinstance(x, pd.Series): + return result + + result = result.rename( + columns={c: '{}_demeaned'.format(c) for c in result.columns}) + + return result + + df = pd.DataFrame({'group': list('ababa'), + 'value': [1, 1, 1, 2, 2]}) + expected = pd.DataFrame({'value': [-1. / 3, -0.5, -1. / 3, 0.5, 2. / 3]}) + + result = df.groupby('group').transform(demean_rename) + tm.assert_frame_equal(result, expected) + result_single = df.groupby('group').value.transform(demean_rename) + tm.assert_series_equal(result_single, expected['value'])