diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b1620df91ba26..7b1a9d8ff6ae3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -412,6 +412,12 @@ def test_constructor_dict_order_insertion(self): expected = DataFrame(data=d, columns=list("ba")) tm.assert_frame_equal(frame, expected) + def test_constructor_dict_nan_key_and_columns(self): + # GH 16894 + result = pd.DataFrame({np.nan: [1, 2], 2: [2, 3]}, columns=[np.nan, 2]) + expected = pd.DataFrame([[1, 2], [2, 3]], columns=[np.nan, 2]) + tm.assert_frame_equal(result, expected) + def test_constructor_multi_index(self): # GH 4078 # construction error with mi and all-nan frame diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3d842aca210ed..0a7272bbc131c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -630,6 +630,22 @@ def test_lambda_named_agg(func): tm.assert_frame_equal(result, expected) +def test_aggregate_mixed_types(): + # GH 16916 + df = pd.DataFrame( + data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc") + ) + df["grouping"] = ["group 1", "group 1", 2] + result = df.groupby("grouping").aggregate(lambda x: x.tolist()) + expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]] + expected = pd.DataFrame( + expected_data, + index=Index([2, "group 1"], dtype="object", name="grouping"), + columns=Index(["X", "Y", "Z"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + class TestLambdaMangling: def test_basic(self): df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) diff --git a/pandas/tests/indexes/interval/test_indexing.py b/pandas/tests/indexes/interval/test_indexing.py index 1bfc58733a110..87b72f702e2aa 100644 --- a/pandas/tests/indexes/interval/test_indexing.py +++ b/pandas/tests/indexes/interval/test_indexing.py @@ -312,6 +312,18 @@ def test_get_indexer_non_unique_with_int_and_float(self, query, expected): # TODO we may also want to test get_indexer for the case when # the intervals are duplicated, decreasing, non-monotonic, etc.. + def test_get_indexer_non_monotonic(self): + # GH 16410 + idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)]) + idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)]) + result = idx1.get_indexer(idx2) + expected = np.array([2, 0, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = idx1.get_indexer(idx1[1:]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + class TestSliceLocs: def test_slice_locs_with_interval(self): diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 8ea825da8f94f..c15fa34283f21 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -250,3 +250,13 @@ def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index): ).T result = df["A"]["B2"] tm.assert_frame_equal(result, expected) + + +def test_frame_mi_empty_slice(): + # GH 15454 + df = DataFrame(0, index=range(2), columns=MultiIndex.from_product([[1], [2]])) + result = df[[]] + expected = DataFrame( + index=[0, 1], columns=MultiIndex(levels=[[1], [2]], codes=[[], []]) + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 3b8aa963ac698..b7802d9b8fe0c 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -468,3 +468,22 @@ def test_loc_period_string_indexing(): ), ) tm.assert_series_equal(result, expected) + + +def test_loc_datetime_mask_slicing(): + # GH 16699 + dt_idx = pd.to_datetime(["2017-05-04", "2017-05-05"]) + m_idx = pd.MultiIndex.from_product([dt_idx, dt_idx], names=["Idx1", "Idx2"]) + df = pd.DataFrame( + data=[[1, 2], [3, 4], [5, 6], [7, 6]], index=m_idx, columns=["C1", "C2"] + ) + result = df.loc[(dt_idx[0], (df.index.get_level_values(1) > "2017-05-04")), "C1"] + expected = pd.Series( + [3], + name="C1", + index=MultiIndex.from_tuples( + [(pd.Timestamp("2017-05-04"), pd.Timestamp("2017-05-05"))], + names=["Idx1", "Idx2"], + ), + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 4e3585c0be884..03c1445e099a0 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -230,6 +230,23 @@ def f(x): tm.assert_series_equal(result, expected) +def test_apply_columns_multilevel(): + # GH 16231 + cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")]) + ind = date_range(start="2017-01-01", freq="15Min", periods=8) + df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols) + agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns} + result = df.resample("H").apply(lambda x: agg_dict[x.name](x)) + expected = DataFrame( + np.array([0] * 4).reshape(2, 2), + index=date_range(start="2017-01-01", freq="1H", periods=2), + columns=pd.MultiIndex.from_tuples( + [("A", "a", "", "one"), ("B", "b", "i", "two")] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_resample_groupby_with_label(): # GH 13235 index = date_range("2000-01-01", freq="2D", periods=5) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6850c52ca05ea..fe75aef1ca3d7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2649,6 +2649,46 @@ def test_crosstab_unsorted_order(self): ) tm.assert_frame_equal(result, expected) + def test_crosstab_normalize_multiple_columns(self): + # GH 15150 + df = pd.DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": [0] * 24, + "E": [0] * 24, + } + ) + result = pd.crosstab( + [df.A, df.B], + df.C, + values=df.D, + aggfunc=np.sum, + normalize=True, + margins=True, + ) + expected = pd.DataFrame( + np.array([0] * 29 + [1], dtype=float).reshape(10, 3), + columns=Index(["bar", "foo", "All"], dtype="object", name="C"), + index=MultiIndex.from_tuples( + [ + ("one", "A"), + ("one", "B"), + ("one", "C"), + ("three", "A"), + ("three", "B"), + ("three", "C"), + ("two", "A"), + ("two", "B"), + ("two", "C"), + ("All", ""), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + def test_margin_normalize(self): # GH 27500 df = pd.DataFrame(