diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py index d6e2ca7399b4e..24e1f99ad7956 100644 --- a/pandas/tests/strings/test_case_justify.py +++ b/pandas/tests/strings/test_case_justify.py @@ -81,6 +81,15 @@ def test_swapcase_mixed_object(): tm.assert_series_equal(result, expected) +def test_casefold(): + # GH25405 + expected = Series(["ss", np.nan, "case", "ssd"]) + s = Series(["ß", np.nan, "case", "ßd"]) + result = s.str.casefold() + + tm.assert_series_equal(result, expected) + + def test_casemethods(any_string_dtype): values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"] s = Series(values, dtype=any_string_dtype) diff --git a/pandas/tests/strings/test_get_dummies.py b/pandas/tests/strings/test_get_dummies.py new file mode 100644 index 0000000000000..31386e4e342ae --- /dev/null +++ b/pandas/tests/strings/test_get_dummies.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, +) + + +def test_get_dummies(any_string_dtype): + s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype) + result = s.str.get_dummies("|") + expected = DataFrame([[1, 1, 0], [1, 0, 1], [0, 0, 0]], columns=list("abc")) + tm.assert_frame_equal(result, expected) + + s = Series(["a;b", "a", 7], dtype=any_string_dtype) + result = s.str.get_dummies(";") + expected = DataFrame([[0, 1, 1], [0, 1, 0], [1, 0, 0]], columns=list("7ab")) + tm.assert_frame_equal(result, expected) + + +def test_get_dummies_index(): + # GH9980, GH8028 + idx = Index(["a|b", "a|c", "b|c"]) + result = idx.str.get_dummies("|") + + expected = MultiIndex.from_tuples( + [(1, 1, 0), (1, 0, 1), (0, 1, 1)], names=("a", "b", "c") + ) + tm.assert_index_equal(result, expected) + + +def test_get_dummies_with_name_dummy(any_string_dtype): + # GH 12180 + # Dummies named 'name' should work as expected + s = Series(["a", "b,name", "b"], dtype=any_string_dtype) + result = s.str.get_dummies(",") + expected = DataFrame([[1, 0, 0], [0, 1, 1], [0, 1, 0]], columns=["a", "b", "name"]) + tm.assert_frame_equal(result, expected) + + +def test_get_dummies_with_name_dummy_index(): + # GH 12180 + # Dummies named 'name' should work as expected + idx = Index(["a|b", "name|c", "b|name"]) + result = idx.str.get_dummies("|") + + expected = MultiIndex.from_tuples( + [(1, 1, 0, 0), (0, 0, 1, 1), (0, 1, 0, 1)], names=("a", "b", "c", "name") + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 80010de047cd5..42d81154dea0f 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -303,50 +303,6 @@ def test_isnumeric(any_string_dtype): tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e, dtype=dtype)) -def test_get_dummies(any_string_dtype): - s = Series(["a|b", "a|c", np.nan], dtype=any_string_dtype) - result = s.str.get_dummies("|") - expected = DataFrame([[1, 1, 0], [1, 0, 1], [0, 0, 0]], columns=list("abc")) - tm.assert_frame_equal(result, expected) - - s = Series(["a;b", "a", 7], dtype=any_string_dtype) - result = s.str.get_dummies(";") - expected = DataFrame([[0, 1, 1], [0, 1, 0], [1, 0, 0]], columns=list("7ab")) - tm.assert_frame_equal(result, expected) - - -def test_get_dummies_index(): - # GH9980, GH8028 - idx = Index(["a|b", "a|c", "b|c"]) - result = idx.str.get_dummies("|") - - expected = MultiIndex.from_tuples( - [(1, 1, 0), (1, 0, 1), (0, 1, 1)], names=("a", "b", "c") - ) - tm.assert_index_equal(result, expected) - - -def test_get_dummies_with_name_dummy(any_string_dtype): - # GH 12180 - # Dummies named 'name' should work as expected - s = Series(["a", "b,name", "b"], dtype=any_string_dtype) - result = s.str.get_dummies(",") - expected = DataFrame([[1, 0, 0], [0, 1, 1], [0, 1, 0]], columns=["a", "b", "name"]) - tm.assert_frame_equal(result, expected) - - -def test_get_dummies_with_name_dummy_index(): - # GH 12180 - # Dummies named 'name' should work as expected - idx = Index(["a|b", "name|c", "b|name"]) - result = idx.str.get_dummies("|") - - expected = MultiIndex.from_tuples( - [(1, 1, 0, 0), (0, 0, 1, 1), (0, 1, 0, 1)], names=("a", "b", "c", "name") - ) - tm.assert_index_equal(result, expected) - - def test_join(): values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) result = values.str.split("_").str.join("_") @@ -782,15 +738,6 @@ def test_method_on_bytes(): lhs.str.cat(rhs) -def test_casefold(): - # GH25405 - expected = Series(["ss", np.nan, "case", "ssd"]) - s = Series(["ß", np.nan, "case", "ßd"]) - result = s.str.casefold() - - tm.assert_series_equal(result, expected) - - def test_str_accessor_in_apply_func(): # https://github.com/pandas-dev/pandas/issues/38979 df = DataFrame(zip("abc", "def"))