From cd000174c98c370a8130b139805011cc90a66bee Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 11 Dec 2019 17:04:17 -0800 Subject: [PATCH 1/3] TST: tests for needs-test issues --- pandas/tests/frame/test_axis_select_reindex.py | 8 ++++++++ pandas/tests/frame/test_constructors.py | 8 +++++++- pandas/tests/frame/test_quantile.py | 10 ++++++++++ pandas/tests/frame/test_timeseries.py | 11 +++++++++++ pandas/tests/indexes/multi/test_constructor.py | 11 +++++++++++ pandas/tests/io/formats/test_format.py | 9 +++++++++ pandas/tests/series/test_missing.py | 7 +++++++ pandas/tests/window/test_rolling.py | 8 ++++++++ 8 files changed, 71 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 16dfae847e0eb..d6ef3a7600abb 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -929,6 +929,14 @@ def test_filter_corner(self): result = empty.filter(like="foo") tm.assert_frame_equal(result, empty) + def test_filter_regex_non_string(self): + # GH#5798 trying to filter on non-string columns should drop, + # not raise + df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) + result = df.filter(regex="STRING") + expected = df[["STRING"]] + tm.assert_frame_equal(result, expected) + def test_take(self, float_frame): # homogeneous order = [3, 1, 2, 0] diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 08dbeb9e585f1..90ff7a585a323 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1726,10 +1726,16 @@ def test_constructor_with_datetimes(self): tm.assert_frame_equal(df, expected) def test_constructor_datetimes_with_nulls(self): - # gh-15869 + # gh-15869, GH#11220 for arr in [ np.array([None, None, None, None, datetime.now(), None]), np.array([None, None, datetime.now(), None]), + [[np.datetime64("NaT")], [None]], + [[np.datetime64("NaT")], [pd.NaT]], + [[None], [np.datetime64("NaT")]], + [[None], [pd.NaT]], + [[pd.NaT], [np.datetime64("NaT")]], + [[pd.NaT], [None]], ]: result = DataFrame(arr).dtypes expected = Series([np.dtype("datetime64[ns]")]) diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py index 5ca7dd32200ee..c25b24121d481 100644 --- a/pandas/tests/frame/test_quantile.py +++ b/pandas/tests/frame/test_quantile.py @@ -7,6 +7,16 @@ class TestDataFrameQuantile: + def test_quantile_sparse(self): + # GH#17198 + s = pd.Series(pd.SparseArray([1, 2])) + s1 = pd.Series(pd.SparseArray([3, 4])) + df = pd.DataFrame({0: s, 1: s1}) + result = df.quantile() + + expected = pd.Series([1.5, 3.5], name=0.5) + tm.assert_series_equal(result, expected) + def test_quantile(self, datetime_frame): from numpy import percentile diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index cad1fd60ca2a9..b91cb2adebdb7 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -609,6 +609,17 @@ def test_first_last_valid( assert expected_first == df.first_valid_index() assert expected_last == df.last_valid_index() + def test_first_valid_index_all_nan(self): + # GH#9752 Series/DataFrame should both return None, not raise + ser = pd.Series([np.nan]) + df = pd.DataFrame([np.nan]) + + assert ser.first_valid_index() is None + assert df.first_valid_index() is None + + assert ser.iloc[:0].first_valid_index() is None + assert df.iloc[:0].first_valid_index() is None + def test_first_subset(self): ts = tm.makeTimeDataFrame(freq="12h") result = ts.first("10d") diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 90e993a807bd2..a0a097aede1c4 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -577,6 +577,17 @@ def test_from_product_respects_none_names(): tm.assert_index_equal(result, expected) +def test_from_product_readonly(): + # GH#15286 passing read-only array to from_product + a = np.array(range(3)) + b = ["a", "b"] + expected = pd.MultiIndex.from_product([a, b]) + + a.setflags(write=False) + result = pd.MultiIndex.from_product([a, b]) + tm.assert_index_equal(result, expected) + + def test_create_index_existing_name(idx): # GH11193, when an existing index is passed, and a new name is not diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 6fd813c086982..8cd356ca766dd 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1106,6 +1106,15 @@ def test_truncate_with_different_dtypes(self): assert "None" in result assert "NaN" not in result + def test_truncate_with_different_dtypes_multiindex(self): + # GH#13000 + df = pd.DataFrame({"Vals": range(100)}) + frame = pd.concat([df], keys=["Sweep"], names=["Sweep", "Index"]) + result = repr(frame) + + result2 = repr(frame.iloc[:5]) + assert result.startswith(result2) + def test_datetimelike_frame(self): # GH 12211 diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 72f08876e71ae..8e1fee4d542e7 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -473,6 +473,13 @@ def test_fillna_consistency(self): s2[1] = "foo" tm.assert_series_equal(s2, expected) + def test_where_sparse(self): + # GH#17198 make sure we dont get an AttributeError for sp_index + ser = pd.Series(pd.SparseArray([1, 2])) + result = ser.where(ser >= 2, 0) + expected = pd.Series(pd.SparseArray([0, 2])) + tm.assert_series_equal(result, expected) + def test_datetime64tz_fillna_round_issue(self): # GH 14872 diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 72b72b31d8faa..227055eb222f8 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -418,3 +418,11 @@ def test_rolling_window_as_string(): expected = Series(expData, index=Index(days, name="DateCol"), name="metric") tm.assert_series_equal(result, expected) + + +def test_min_periods1(): + # GH#6795 + df = pd.DataFrame([0, 1, 2, 1, 0], columns=["a"]) + result = df["a"].rolling(3, center=True, min_periods=1).max() + expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") + tm.assert_series_equal(result, expected) From 48d720a492b615be327a6c6937d84923d1eeafed Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 12 Dec 2019 08:01:26 -0800 Subject: [PATCH 2/3] update per comments --- pandas/tests/frame/test_timeseries.py | 13 +++++-------- pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ pandas/tests/indexes/multi/test_constructor.py | 4 ++-- pandas/tests/io/formats/test_format.py | 2 +- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index b91cb2adebdb7..f6d2f58a63b53 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -609,16 +609,13 @@ def test_first_last_valid( assert expected_first == df.first_valid_index() assert expected_last == df.last_valid_index() - def test_first_valid_index_all_nan(self): + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_first_valid_index_all_nan(self, klass): # GH#9752 Series/DataFrame should both return None, not raise - ser = pd.Series([np.nan]) - df = pd.DataFrame([np.nan]) + obj = klass([np.nan]) - assert ser.first_valid_index() is None - assert df.first_valid_index() is None - - assert ser.iloc[:0].first_valid_index() is None - assert df.iloc[:0].first_valid_index() is None + assert obj.first_valid_index() is None + assert obj.iloc[:0].first_valid_index() is None def test_first_subset(self): ts = tm.makeTimeDataFrame(freq="12h") diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 93d4dc6046735..ef542bd4745f9 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -669,6 +669,19 @@ def test_as_index_series_return_frame(df): tm.assert_frame_equal(result2, expected2) +def test_as_index_selects_column(): + # GH#5764 check that getitem actually selects the column + df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + gb = df.groupby("A", as_index=False)["B"] + result = gb.get_group(1) + + expected = pd.Series([2, 4], index=[0, 1], name="B") + tm.assert_series_equal(result, expected) + + result = gb.apply(lambda x: x.cumsum()) + raise NotImplementedError + + def test_as_index_series_column_slice_raises(df): # GH15072 grouped = df.groupby("A", as_index=False) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index a0a097aede1c4..0e4d144c0fd34 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -581,10 +581,10 @@ def test_from_product_readonly(): # GH#15286 passing read-only array to from_product a = np.array(range(3)) b = ["a", "b"] - expected = pd.MultiIndex.from_product([a, b]) + expected = MultiIndex.from_product([a, b]) a.setflags(write=False) - result = pd.MultiIndex.from_product([a, b]) + result = MultiIndex.from_product([a, b]) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 8cd356ca766dd..d8604774777a6 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1108,7 +1108,7 @@ def test_truncate_with_different_dtypes(self): def test_truncate_with_different_dtypes_multiindex(self): # GH#13000 - df = pd.DataFrame({"Vals": range(100)}) + df = DataFrame({"Vals": range(100)}) frame = pd.concat([df], keys=["Sweep"], names=["Sweep", "Index"]) result = repr(frame) From ce531f7b2a386fc449f0c9cdc7fe27e5baa2f6fa Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 12 Dec 2019 10:25:28 -0800 Subject: [PATCH 3/3] revert edit didnt belong --- pandas/tests/groupby/test_groupby.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ef542bd4745f9..93d4dc6046735 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -669,19 +669,6 @@ def test_as_index_series_return_frame(df): tm.assert_frame_equal(result2, expected2) -def test_as_index_selects_column(): - # GH#5764 check that getitem actually selects the column - df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) - gb = df.groupby("A", as_index=False)["B"] - result = gb.get_group(1) - - expected = pd.Series([2, 4], index=[0, 1], name="B") - tm.assert_series_equal(result, expected) - - result = gb.apply(lambda x: x.cumsum()) - raise NotImplementedError - - def test_as_index_series_column_slice_raises(df): # GH15072 grouped = df.groupby("A", as_index=False)