From 7bbdfdee93afb1ac545e6484b13e4b0de8f3d621 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Thu, 6 May 2021 10:59:14 -0400 Subject: [PATCH] TST/REF: split out replace regex into class --- pandas/tests/frame/methods/test_replace.py | 424 +++++++++++---------- 1 file changed, 213 insertions(+), 211 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index e6ed60dc2bb08..d2974a5d08a60 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -57,217 +57,6 @@ def test_replace_inplace(self, datetime_frame, float_string_frame): assert return_value is None tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) - def test_regex_replace_scalar(self, mix_ab): - obj = {"a": list("ab.."), "b": list("efgh")} - dfobj = DataFrame(obj) - dfmix = DataFrame(mix_ab) - - # simplest cases - # regex -> value - # obj frame - res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True) - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True) - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - # everything with compiled regexs as well - res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - def test_regex_replace_scalar_inplace(self, mix_ab): - obj = {"a": list("ab.."), "b": list("efgh")} - dfobj = DataFrame(obj) - dfmix = DataFrame(mix_ab) - - # simplest cases - # regex -> value - # obj frame - res = dfobj.copy() - return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - # everything with compiled regexs as well - res = dfobj.copy() - return_value = res.replace( - re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace( - re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace( - re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True - ) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace( - re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True - ) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfobj.copy() - return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - # everything with compiled regexs as well - res = dfobj.copy() - return_value = res.replace( - regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace( - regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace( - regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True - ) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace( - regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True - ) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - def test_regex_replace_list_obj(self): obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} dfobj = DataFrame(obj) @@ -1689,3 +1478,216 @@ def test_replace_bytes(self, frame_or_series): expected = obj.copy() obj = obj.replace({None: np.nan}) tm.assert_equal(obj, expected) + + +class TestDataFrameReplaceRegex: + def test_regex_replace_scalar(self, mix_ab): + obj = {"a": list("ab.."), "b": list("efgh")} + dfobj = DataFrame(obj) + dfmix = DataFrame(mix_ab) + + # simplest cases + # regex -> value + # obj frame + res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + # everything with compiled regexs as well + res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1") + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1") + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_scalar_inplace(self, mix_ab): + obj = {"a": list("ab.."), "b": list("efgh")} + dfobj = DataFrame(obj) + dfmix = DataFrame(mix_ab) + + # simplest cases + # regex -> value + # obj frame + res = dfobj.copy() + return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) + assert return_value is None + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) + assert return_value is None + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) + assert return_value is None + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) + assert return_value is None + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + # everything with compiled regexs as well + res = dfobj.copy() + return_value = res.replace( + re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True + ) + assert return_value is None + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + return_value = res.replace( + re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True + ) + assert return_value is None + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + return_value = res.replace( + re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True + ) + assert return_value is None + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + return_value = res.replace( + re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True + ) + assert return_value is None + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + res = dfobj.copy() + return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) + assert return_value is None + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) + assert return_value is None + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) + assert return_value is None + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + # everything with compiled regexs as well + res = dfobj.copy() + return_value = res.replace( + regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True + ) + assert return_value is None + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + return_value = res.replace( + regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True + ) + assert return_value is None + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + return_value = res.replace( + regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True + ) + assert return_value is None + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + return_value = res.replace( + regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True + ) + assert return_value is None + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec)