Closed
Description
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest version of pandas.
-
(optional) I have confirmed this bug exists on the master branch of pandas.
Code Sample
In [1]: df = pd.DataFrame(dict(a=[nan], b=[0], c=[1]))
In [2]: df
a b c
0 NaN 0 1
# groupby drops groups with null by default
In [3]: df.groupby(['a', 'b']).first()
Empty DataFrame
Columns: [c]
Index: []
# pass dropna=False to keep groups with nulls
In [4]: df.groupby(['a', 'b'], dropna=False).first()
c
a b
NaN 0 1
# This doesn't work if the groups are in a multi index.
In [5]: df.set_index(['a', 'b']).groupby(['a', 'b'], dropna=False).first()
Empty DataFrame
Columns: [c]
Index: []
Problem description
It seems like setting dropna=False
in groupby
doesn't work when the group columns are part of a multi index. I'm not sure if this is intentional, but it is certainly confusing.
Expected Output
I would expect the output of [5] above to match that of [4].
Output of pd.show_versions()
{
"system": {
"commit": "7d32926db8f7541c356066dcadabf854487738de",
"python": "3.8.5.final.0",
"python-bits": 64,
"OS": "Linux",
"OS-release": "5.4.0-1035-aws",
"Version": "#37-Ubuntu SMP Wed Jan 6 21:01:57 UTC 2021",
"machine": "x86_64",
"processor": "x86_64",
"byteorder": "little",
"LC_ALL": "C.UTF-8",
"LANG": "C.UTF-8",
"LOCALE": {
"language-code": "en_US",
"encoding": "UTF-8"
}
},
"dependencies": {
"pandas": "1.2.2",
"numpy": "1.20.1",
"pytz": "2021.1",
"dateutil": "2.8.1",
"pip": "20.3.3",
"setuptools": "51.3.3",
"Cython": null,
"pytest": null,
"hypothesis": null,
"sphinx": null,
"blosc": null,
"feather": null,
"xlsxwriter": null,
"lxml.etree": null,
"html5lib": null,
"pymysql": null,
"psycopg2": null,
"jinja2": null,
"IPython": "7.20.0",
"pandas_datareader": null,
"bs4": "4.9.3",
"bottleneck": null,
"fsspec": null,
"fastparquet": "0.5.0",
"gcsfs": null,
"matplotlib": null,
"numexpr": null,
"odfpy": null,
"openpyxl": null,
"pandas_gbq": null,
"pyarrow": "3.0.0",
"pyxlsb": null,
"s3fs": null,
"scipy": null,
"sqlalchemy": null,
"tables": null,
"tabulate": null,
"xarray": null,
"xlrd": null,
"xlwt": null,
"numba": "0.52.0"
}
}