diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index caf3a4281561f..8738b8546375f 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -794,6 +794,7 @@ Groupby/resample/rolling - Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`) +- Bug in :meth:`GroupBy.mean` raising ``KeyError`` when column was selected at least twice (:issue:`44924`) - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) diff --git a/pandas/core/base.py b/pandas/core/base.py index 9040414a8f35f..45a9b92d94b62 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -235,7 +235,7 @@ def __getitem__(self, key): raise IndexError(f"Column(s) {self._selection} already selected") if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): - if len(self.obj.columns.intersection(key)) != len(key): + if len(self.obj.columns.intersection(key)) != len(set(key)): bad_keys = list(set(key).difference(self.obj.columns)) raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") return self._gotitem(list(key), ndim=2) diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index b9f71fd4ed96a..aea659445801b 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -2,6 +2,7 @@ import random +import numpy as np import pytest import pandas as pd @@ -285,3 +286,20 @@ def test_column_axis(column_group_df): expected = column_group_df.iloc[:, [1, 3]] tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", [list, pd.Index, pd.Series, np.array]) +def test_groupby_duplicated_columns(func): + # GH#44924 + df = pd.DataFrame( + { + "A": [1, 2], + "B": [3, 3], + "C": ["G", "G"], + } + ) + result = df.groupby("C")[func(["A", "B", "A"])].mean() + expected = pd.DataFrame( + [[1.5, 3.0, 1.5]], columns=["A", "B", "A"], index=pd.Index(["G"], name="C") + ) + tm.assert_frame_equal(result, expected)