diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 3be45e2d48e19..6aebc23d1c016 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -8,6 +8,8 @@ import pandas as pd from pandas import ( + Categorical, + CategoricalIndex, DataFrame, Index, IntervalIndex, @@ -542,6 +544,33 @@ def test_reset_index_nat_multiindex(self, ix_data, exp_data): expected = DataFrame(exp_data) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "codes", ([[0, 0, 1, 1], [0, 1, 0, 1]], [[0, 0, -1, 1], [0, 1, 0, 1]]) + ) + def test_rest_index_multiindex_categorical_with_missing_values(self, codes): + # GH#24206 + + index = MultiIndex( + [CategoricalIndex(["A", "B"]), CategoricalIndex(["a", "b"])], codes + ) + data = {"col": range(len(index))} + df = DataFrame(data=data, index=index) + + expected = DataFrame( + { + "level_0": Categorical.from_codes(codes[0], categories=["A", "B"]), + "level_1": Categorical.from_codes(codes[1], categories=["a", "b"]), + "col": range(4), + } + ) + + res = df.reset_index() + tm.assert_frame_equal(res, expected) + + # roundtrip + res = expected.set_index(["level_0", "level_1"]).reset_index() + tm.assert_frame_equal(res, expected) + @pytest.mark.parametrize( "array, dtype", diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index e4a0d37e3a017..3cd35e900ee06 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -10,13 +10,10 @@ ) from pandas import ( - Categorical, - CategoricalIndex, DataFrame, DatetimeIndex, Index, IntervalIndex, - MultiIndex, Series, Timestamp, cut, @@ -192,32 +189,3 @@ def test_set_reset_index(self): df = df.set_index("B") df = df.reset_index() - - -class TestCategoricalIndex: - @pytest.mark.parametrize( - "codes", ([[0, 0, 1, 1], [0, 1, 0, 1]], [[0, 0, -1, 1], [0, 1, 0, 1]]) - ) - def test_reindexing_with_missing_values(self, codes): - # GH 24206 - - index = MultiIndex( - [CategoricalIndex(["A", "B"]), CategoricalIndex(["a", "b"])], codes - ) - data = {"col": range(len(index))} - df = DataFrame(data=data, index=index) - - expected = DataFrame( - { - "level_0": Categorical.from_codes(codes[0], categories=["A", "B"]), - "level_1": Categorical.from_codes(codes[1], categories=["a", "b"]), - "col": range(4), - } - ) - - res = df.reset_index() - tm.assert_frame_equal(res, expected) - - # roundtrip - res = expected.set_index(["level_0", "level_1"]).reset_index() - tm.assert_frame_equal(res, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index cc448279bfce0..2a15875229e12 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -9,7 +9,7 @@ from pandas._libs.tslibs import conversion, timezones import pandas as pd -from pandas import Index, Series, Timestamp, date_range, period_range +from pandas import DataFrame, Index, Series, Timestamp, date_range, period_range import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -17,6 +17,19 @@ class TestSeriesGetitemScalars: + def test_getitem_out_of_bounds_indexerror(self, datetime_series): + # don't segfault, GH#495 + msg = r"index \d+ is out of bounds for axis 0 with size \d+" + with pytest.raises(IndexError, match=msg): + datetime_series[len(datetime_series)] + + def test_getitem_out_of_bounds_empty_rangeindex_keyerror(self): + # GH#917 + # With a RangeIndex, an int key gives a KeyError + ser = Series([], dtype=object) + with pytest.raises(KeyError, match="-1"): + ser[-1] + def test_getitem_keyerror_with_int64index(self): ser = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) @@ -292,11 +305,23 @@ def test_getitem_multilevel_scalar_slice_not_implemented( ser[2000, 3:4] +def test_getitem_dataframe_raises(): + rng = list(range(10)) + ser = Series(10, index=rng) + df = DataFrame(rng, index=rng) + msg = ( + "Indexing a Series with DataFrame is not supported, " + "use the appropriate DataFrame column" + ) + with pytest.raises(TypeError, match=msg): + ser[df > 5] + + def test_getitem_assignment_series_aligment(): # https://github.com/pandas-dev/pandas/issues/37427 # with getitem, when assigning with a Series, it is not first aligned - s = Series(range(10)) + ser = Series(range(10)) idx = np.array([2, 4, 9]) - s[idx] = Series([10, 11, 12]) + ser[idx] = Series([10, 11, 12]) expected = Series([0, 1, 10, 3, 11, 5, 6, 7, 8, 12]) - tm.assert_series_equal(s, expected) + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 8c53ed85a20b3..214694443ba2a 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -163,19 +163,6 @@ def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1): assert result[2] == result_1[2] -def test_getitem_out_of_bounds(datetime_series): - # don't segfault, GH #495 - msg = r"index \d+ is out of bounds for axis 0 with size \d+" - with pytest.raises(IndexError, match=msg): - datetime_series[len(datetime_series)] - - # GH #917 - # With a RangeIndex, an int key gives a KeyError - s = Series([], dtype=object) - with pytest.raises(KeyError, match="-1"): - s[-1] - - def test_getitem_setitem_integers(): # caused bug without test s = Series([1, 2, 3], ["a", "b", "c"]) @@ -260,18 +247,6 @@ def test_setitem_ambiguous_keyerror(): tm.assert_series_equal(s2, expected) -def test_getitem_dataframe(): - rng = list(range(10)) - s = Series(10, index=rng) - df = DataFrame(rng, index=rng) - msg = ( - "Indexing a Series with DataFrame is not supported, " - "use the appropriate DataFrame column" - ) - with pytest.raises(TypeError, match=msg): - s[df > 5] - - def test_setitem(datetime_series, string_series): datetime_series[datetime_series.index[5]] = np.NaN datetime_series[[1, 2, 17]] = np.NaN @@ -296,22 +271,6 @@ def test_setitem(datetime_series, string_series): tm.assert_series_equal(s, expected) -def test_setitem_empty_series(): - # Test for issue #10193 - key = pd.Timestamp("2012-01-01") - series = Series(dtype=object) - series[key] = 47 - expected = Series(47, [key]) - tm.assert_series_equal(series, expected) - - # GH#33573 our index should retain its freq - series = Series([], pd.DatetimeIndex([], freq="D"), dtype=object) - series[key] = 47 - expected = Series(47, pd.DatetimeIndex([key], freq="D")) - tm.assert_series_equal(series, expected) - assert series.index.freq == expected.index.freq - - def test_setitem_dtypes(): # change dtypes # GH 4463 @@ -338,32 +297,13 @@ def test_setitem_dtypes(): tm.assert_series_equal(s, Series([np.nan, 1.0])) -def test_set_value(datetime_series, string_series): - idx = datetime_series.index[10] - res = datetime_series._set_value(idx, 0) - assert res is None - assert datetime_series[idx] == 0 - - # equiv - s = string_series.copy() - res = s._set_value("foobar", 0) - assert res is None - assert s.index[-1] == "foobar" - assert s["foobar"] == 0 - - s = string_series.copy() - s.loc["foobar"] = 0 - assert s.index[-1] == "foobar" - assert s["foobar"] == 0 - - def test_setslice(datetime_series): sl = datetime_series[5:20] assert len(sl) == len(sl.index) assert sl.index.is_unique is True -def test_2d_to_1d_assignment_raises(): +def test_loc_setitem_2d_to_1d_raises(): x = np.random.randn(2, 2) y = Series(range(2)) @@ -611,25 +551,6 @@ def test_loc_setitem(string_series): assert string_series[d2] == 6 -def test_setitem_na(): - # these induce dtype changes - expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) - s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - s[::2] = np.nan - tm.assert_series_equal(s, expected) - - # gets coerced to float, right? - expected = Series([np.nan, 1, np.nan, 0]) - s = Series([True, True, False, False]) - s[::2] = np.nan - tm.assert_series_equal(s, expected) - - expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) - s = Series(np.arange(10)) - s[:5] = np.nan - tm.assert_series_equal(s, expected) - - def test_timedelta_assignment(): # GH 8209 s = Series([], dtype=object) @@ -829,52 +750,11 @@ def test_multilevel_preserve_name(): assert result2.name == s.name -def test_setitem_scalar_into_readonly_backing_data(): - # GH14359: test that you cannot mutate a read only buffer - - array = np.zeros(5) - array.flags.writeable = False # make the array immutable - series = Series(array) - - for n in range(len(series)): - msg = "assignment destination is read-only" - with pytest.raises(ValueError, match=msg): - series[n] = 1 - - assert array[n] == 0 - - -def test_setitem_slice_into_readonly_backing_data(): - # GH14359: test that you cannot mutate a read only buffer - - array = np.zeros(5) - array.flags.writeable = False # make the array immutable - series = Series(array) - - msg = "assignment destination is read-only" - with pytest.raises(ValueError, match=msg): - series[1:3] = 1 - - assert not array.any() - - """ miscellaneous methods """ -def test_pop(): - # GH 6600 - df = DataFrame({"A": 0, "B": np.arange(5, dtype="int64"), "C": 0}) - k = df.iloc[4] - - result = k.pop("B") - assert result == 4 - - expected = Series([0, 0], index=["A", "C"], name=4) - tm.assert_series_equal(k, expected) - - def test_uint_drop(any_int_dtype): # see GH18311 # assigning series.loc[0] = 4 changed series.dtype to int diff --git a/pandas/tests/series/indexing/test_pop.py b/pandas/tests/series/indexing/test_pop.py new file mode 100644 index 0000000000000..7453f98ab3735 --- /dev/null +++ b/pandas/tests/series/indexing/test_pop.py @@ -0,0 +1,13 @@ +from pandas import Series +import pandas._testing as tm + + +def test_pop(): + # GH#6600 + ser = Series([0, 4, 0], index=["A", "B", "C"], name=4) + + result = ser.pop("B") + assert result == 4 + + expected = Series([0, 0], index=["A", "C"], name=4) + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/indexing/test_set_value.py b/pandas/tests/series/indexing/test_set_value.py index ea09646de71c1..61b01720d1e40 100644 --- a/pandas/tests/series/indexing/test_set_value.py +++ b/pandas/tests/series/indexing/test_set_value.py @@ -19,3 +19,24 @@ def test_series_set_value(): expected = Series([1.0, np.nan], index=index) tm.assert_series_equal(s, expected) + + +def test_set_value_dt64(datetime_series): + idx = datetime_series.index[10] + res = datetime_series._set_value(idx, 0) + assert res is None + assert datetime_series[idx] == 0 + + +def test_set_value_str_index(string_series): + # equiv + ser = string_series.copy() + res = ser._set_value("foobar", 0) + assert res is None + assert ser.index[-1] == "foobar" + assert ser["foobar"] == 0 + + ser2 = string_series.copy() + ser2.loc["foobar"] = 0 + assert ser2.index[-1] == "foobar" + assert ser2["foobar"] == 0 diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 967cf5c55845c..b4c5ac0195d26 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -3,7 +3,15 @@ import numpy as np import pytest -from pandas import MultiIndex, NaT, Series, Timestamp, date_range, period_range +from pandas import ( + DatetimeIndex, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + period_range, +) from pandas.core.indexing import IndexingError import pandas.testing as tm @@ -162,3 +170,71 @@ def test_setitem_callable_other(self): expected = Series([1, 2, inc, 4]) tm.assert_series_equal(ser, expected) + + +class TestSetitemCasting: + def test_setitem_nan_casts(self): + # these induce dtype changes + expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) + ser = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) + ser[::2] = np.nan + tm.assert_series_equal(ser, expected) + + # gets coerced to float, right? + expected = Series([np.nan, 1, np.nan, 0]) + ser = Series([True, True, False, False]) + ser[::2] = np.nan + tm.assert_series_equal(ser, expected) + + expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) + ser = Series(np.arange(10)) + ser[:5] = np.nan + tm.assert_series_equal(ser, expected) + + +class TestSetitemWithExpansion: + def test_setitem_empty_series(self): + # GH#10193 + key = Timestamp("2012-01-01") + series = Series(dtype=object) + series[key] = 47 + expected = Series(47, [key]) + tm.assert_series_equal(series, expected) + + def test_setitem_empty_series_datetimeindex_preserves_freq(self): + # GH#33573 our index should retain its freq + series = Series([], DatetimeIndex([], freq="D"), dtype=object) + key = Timestamp("2012-01-01") + series[key] = 47 + expected = Series(47, DatetimeIndex([key], freq="D")) + tm.assert_series_equal(series, expected) + assert series.index.freq == expected.index.freq + + +def test_setitem_scalar_into_readonly_backing_data(): + # GH#14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + for n in range(len(series)): + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[n] = 1 + + assert array[n] == 0 + + +def test_setitem_slice_into_readonly_backing_data(): + # GH#14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[1:3] = 1 + + assert not array.any()