From 369702c29839ac6fdfca195667978c05c0c6ccbe Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 20 May 2021 20:53:45 +0100
Subject: [PATCH 1/5] [ArrowStringArray] use
 pyarrow.compute.replace_substring(_regex) if available

---
 pandas/core/arrays/string_arrow.py          |  23 ++
 pandas/core/strings/accessor.py             |  64 +--
 pandas/core/strings/base.py                 |  20 +-
 pandas/core/strings/object_array.py         |  36 +-
 pandas/tests/series/methods/test_replace.py |  11 -
 pandas/tests/strings/test_find_replace.py   | 416 ++++++++++++--------
 6 files changed, 345 insertions(+), 225 deletions(-)

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
index d5ee28eb7017e..0ad2df77165a7 100644
--- a/pandas/core/arrays/string_arrow.py
+++ b/pandas/core/arrays/string_arrow.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from collections.abc import Callable  # noqa: PDF001
 import re
 from typing import (
     TYPE_CHECKING,
@@ -834,6 +835,28 @@ def _str_endswith(self, pat: str, na=None):
         pat = re.escape(pat) + "$"
         return self._str_contains(pat, na=na, regex=True)
 
+    def _str_replace(
+        self,
+        pat: str | re.Pattern,
+        repl: str | Callable,
+        n: int = -1,
+        case: bool = True,
+        flags: int = 0,
+        regex: bool = True,
+    ):
+        if (
+            pa_version_under4p0
+            or isinstance(pat, re.Pattern)
+            or callable(repl)
+            or not case
+            or flags
+        ):
+            return super()._str_replace(pat, repl, n, case, flags, regex)
+
+        func = pc.replace_substring_regex if regex else pc.replace_substring
+        result = func(self._data, pattern=pat, replacement=repl, max_replacements=n)
+        return type(self)(result)
+
     def _str_match(
         self, pat: str, case: bool = True, flags: int = 0, na: Scalar = None
     ):
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 43df34a7ecbb2..f48b38d38c53b 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1,14 +1,12 @@
+from __future__ import annotations
+
 import codecs
+from collections.abc import Callable  # noqa: PDF001
 from functools import wraps
 import re
 from typing import (
     TYPE_CHECKING,
-    Dict,
     Hashable,
-    List,
-    Optional,
-    Pattern,
-    Union,
 )
 import warnings
 
@@ -43,7 +41,7 @@
 if TYPE_CHECKING:
     from pandas import Index
 
-_shared_docs: Dict[str, str] = {}
+_shared_docs: dict[str, str] = {}
 _cpython_optimized_encoders = (
     "utf-8",
     "utf8",
@@ -325,7 +323,7 @@ def cons_row(x):
         else:
             index = self._orig.index
             # This is a mess.
-            dtype: Optional[str]
+            dtype: str | None
             if self._is_string and returns_string:
                 dtype = self._orig.dtype
             else:
@@ -391,7 +389,7 @@ def _get_series_list(self, others):
                 or (isinstance(x, np.ndarray) and x.ndim == 1)
                 for x in others
             ):
-                los: List[Series] = []
+                los: list[Series] = []
                 while others:  # iterate through list and append each element
                     los = los + self._get_series_list(others.pop(0))
                 return los
@@ -1219,7 +1217,15 @@ def fullmatch(self, pat, case=True, flags=0, na=None):
         return self._wrap_result(result, fill_value=na, returns_string=False)
 
     @forbid_nonstring_types(["bytes"])
-    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
+    def replace(
+        self,
+        pat: str | re.Pattern,
+        repl: str | Callable,
+        n: int = -1,
+        case: bool | None = None,
+        flags: int = 0,
+        regex: bool | None = None,
+    ):
         r"""
         Replace each occurrence of pattern/regex in the Series/Index.
 
@@ -1348,26 +1354,21 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
                 )
                 if len(pat) == 1:
                     msg += (
-                        " In addition, single character regular expressions will"
+                        " In addition, single character regular expressions will "
                         "*not* be treated as literal strings when regex=True."
                     )
                 warnings.warn(msg, FutureWarning, stacklevel=3)
-            regex = True
 
         # Check whether repl is valid (GH 13438, GH 15055)
         if not (isinstance(repl, str) or callable(repl)):
             raise TypeError("repl must be a string or callable")
 
         is_compiled_re = is_re(pat)
-        if regex:
-            if is_compiled_re:
-                if (case is not None) or (flags != 0):
-                    raise ValueError(
-                        "case and flags cannot be set when pat is a compiled regex"
-                    )
-            elif case is None:
-                # not a compiled regex, set default case
-                case = True
+        if regex or regex is None:
+            if is_compiled_re and (case is not None or flags != 0):
+                raise ValueError(
+                    "case and flags cannot be set when pat is a compiled regex"
+                )
 
         elif is_compiled_re:
             raise ValueError(
@@ -1376,6 +1377,15 @@ def replace(self, pat, repl, n=-1, case=None, flags=0, regex=None):
         elif callable(repl):
             raise ValueError("Cannot use a callable replacement when regex=False")
 
+        if regex is None:
+            if isinstance(pat, str) and len(pat) == 1:
+                regex = False
+            else:
+                regex = True
+
+        if case is None:
+            case = True
+
         result = self._data.array._str_replace(
             pat, repl, n=n, case=case, flags=flags, regex=regex
         )
@@ -2292,7 +2302,7 @@ def findall(self, pat, flags=0):
     @forbid_nonstring_types(["bytes"])
     def extract(
         self, pat: str, flags: int = 0, expand: bool = True
-    ) -> Union[FrameOrSeriesUnion, "Index"]:
+    ) -> FrameOrSeriesUnion | Index:
         r"""
         Extract capture groups in the regex `pat` as columns in a DataFrame.
 
@@ -2733,7 +2743,7 @@ def len(self):
     #   boolean:
     #     isalpha, isnumeric isalnum isdigit isdecimal isspace islower isupper istitle
     # _doc_args holds dict of strings to use in substituting casemethod docs
-    _doc_args: Dict[str, Dict[str, str]] = {}
+    _doc_args: dict[str, dict[str, str]] = {}
     _doc_args["lower"] = {"type": "lowercase", "method": "lower", "version": ""}
     _doc_args["upper"] = {"type": "uppercase", "method": "upper", "version": ""}
     _doc_args["title"] = {"type": "titlecase", "method": "title", "version": ""}
@@ -2971,7 +2981,7 @@ def casefold(self):
     )
 
 
-def cat_safe(list_of_columns: List, sep: str):
+def cat_safe(list_of_columns: list, sep: str):
     """
     Auxiliary function for :meth:`str.cat`.
 
@@ -3007,7 +3017,7 @@ def cat_safe(list_of_columns: List, sep: str):
     return result
 
 
-def cat_core(list_of_columns: List, sep: str):
+def cat_core(list_of_columns: list, sep: str):
     """
     Auxiliary function for :meth:`str.cat`
 
@@ -3046,14 +3056,14 @@ def _result_dtype(arr):
         return object
 
 
-def _get_single_group_name(regex: Pattern) -> Hashable:
+def _get_single_group_name(regex: re.Pattern) -> Hashable:
     if regex.groupindex:
         return next(iter(regex.groupindex))
     else:
         return None
 
 
-def _get_group_names(regex: Pattern) -> List[Hashable]:
+def _get_group_names(regex: re.Pattern) -> list[Hashable]:
     """
     Get named groups from compiled regex.
 
@@ -3119,7 +3129,7 @@ def str_extract(accessor: StringMethods, pat: str, flags: int = 0, expand: bool
         else:
             result_list = _str_extract(obj.array, pat, flags=flags, expand=returns_df)
 
-            result_index: Optional["Index"]
+            result_index: Index | None
             if isinstance(obj, ABCSeries):
                 result_index = obj.index
             else:
diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py
index a77f8861a7c02..730870b448cb2 100644
--- a/pandas/core/strings/base.py
+++ b/pandas/core/strings/base.py
@@ -1,8 +1,8 @@
+from __future__ import annotations
+
 import abc
-from typing import (
-    Pattern,
-    Union,
-)
+from collections.abc import Callable  # noqa: PDF001
+import re
 
 import numpy as np
 
@@ -52,7 +52,15 @@ def _str_endswith(self, pat, na=None):
         pass
 
     @abc.abstractmethod
-    def _str_replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
+    def _str_replace(
+        self,
+        pat: str | re.Pattern,
+        repl: str | Callable,
+        n: int = -1,
+        case: bool = True,
+        flags: int = 0,
+        regex: bool = True,
+    ):
         pass
 
     @abc.abstractmethod
@@ -68,7 +76,7 @@ def _str_match(
     @abc.abstractmethod
     def _str_fullmatch(
         self,
-        pat: Union[str, Pattern],
+        pat: str | re.Pattern,
         case: bool = True,
         flags: int = 0,
         na: Scalar = np.nan,
diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 869eabc76b555..8505a88adc212 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -1,11 +1,8 @@
+from __future__ import annotations
+
+from collections.abc import Callable  # noqa: PDF001
 import re
 import textwrap
-from typing import (
-    Optional,
-    Pattern,
-    Set,
-    Union,
-)
 import unicodedata
 
 import numpy as np
@@ -18,10 +15,7 @@
     Scalar,
 )
 
-from pandas.core.dtypes.common import (
-    is_re,
-    is_scalar,
-)
+from pandas.core.dtypes.common import is_scalar
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.strings.base import BaseStringArrayMethods
@@ -38,7 +32,7 @@ def __len__(self):
         # For typing, _str_map relies on the object being sized.
         raise NotImplementedError
 
-    def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None):
+    def _str_map(self, f, na_value=None, dtype: Dtype | None = None):
         """
         Map a callable over valid element of the array.
 
@@ -138,15 +132,21 @@ def _str_endswith(self, pat, na=None):
         f = lambda x: x.endswith(pat)
         return self._str_map(f, na_value=na, dtype=np.dtype(bool))
 
-    def _str_replace(self, pat, repl, n=-1, case: bool = True, flags=0, regex=True):
-        is_compiled_re = is_re(pat)
-
+    def _str_replace(
+        self,
+        pat: str | re.Pattern,
+        repl: str | Callable,
+        n: int = -1,
+        case: bool = True,
+        flags: int = 0,
+        regex: bool = True,
+    ):
         if case is False:
             # add case flag, if provided
             flags |= re.IGNORECASE
 
-        if regex and (is_compiled_re or len(pat) > 1 or flags or callable(repl)):
-            if not is_compiled_re:
+        if regex or flags or callable(repl):
+            if not isinstance(pat, re.Pattern):
                 pat = re.compile(pat, flags=flags)
 
             n = n if n >= 0 else 0
@@ -198,7 +198,7 @@ def _str_match(
 
     def _str_fullmatch(
         self,
-        pat: Union[str, Pattern],
+        pat: str | re.Pattern,
         case: bool = True,
         flags: int = 0,
         na: Scalar = None,
@@ -339,7 +339,7 @@ def _str_get_dummies(self, sep="|"):
         except TypeError:
             arr = sep + arr.astype(str) + sep
 
-        tags: Set[str] = set()
+        tags: set[str] = set()
         for ts in Series(arr).str.split(sep):
             tags.update(ts)
         tags2 = sorted(tags - {""})
diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py
index b21a2c54ae615..c32d74c17a47e 100644
--- a/pandas/tests/series/methods/test_replace.py
+++ b/pandas/tests/series/methods/test_replace.py
@@ -449,14 +449,3 @@ def test_replace_with_compiled_regex(self):
         result = s.replace({regex: "z"}, regex=True)
         expected = pd.Series(["z", "b", "c"])
         tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("pattern", ["^.$", "."])
-    def test_str_replace_regex_default_raises_warning(self, pattern):
-        # https://github.com/pandas-dev/pandas/pull/24809
-        s = pd.Series(["a", "b", "c"])
-        msg = r"The default value of regex will change from True to False"
-        if len(pattern) == 1:
-            msg += r".*single character regular expressions.*not.*literal strings"
-        with tm.assert_produces_warning(FutureWarning) as w:
-            s.str.replace(pattern, "")
-            assert re.match(msg, str(w[0].message))
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index 0815d23f2b493..f9054b3de0a72 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -10,6 +10,10 @@
     _testing as tm,
 )
 
+# --------------------------------------------------------------------------------------
+# str.contains
+# --------------------------------------------------------------------------------------
+
 
 def test_contains(any_string_dtype):
     values = np.array(
@@ -148,6 +152,81 @@ def test_contains_na_kwarg_for_nullable_string_dtype(
     tm.assert_series_equal(result, expected)
 
 
+def test_contains_moar(any_string_dtype):
+    # PR #1179
+    s = Series(
+        ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
+        dtype=any_string_dtype,
+    )
+
+    result = s.str.contains("a")
+    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected = Series(
+        [False, False, False, True, True, False, np.nan, False, False, True],
+        dtype=expected_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.contains("a", case=False)
+    expected = Series(
+        [True, False, False, True, True, False, np.nan, True, False, True],
+        dtype=expected_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.contains("Aa")
+    expected = Series(
+        [False, False, False, True, False, False, np.nan, False, False, False],
+        dtype=expected_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.contains("ba")
+    expected = Series(
+        [False, False, False, True, False, False, np.nan, False, False, False],
+        dtype=expected_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.contains("ba", case=False)
+    expected = Series(
+        [False, False, False, True, True, False, np.nan, True, False, False],
+        dtype=expected_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+def test_contains_nan(any_string_dtype):
+    # PR #14171
+    s = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype)
+
+    result = s.str.contains("foo", na=False)
+    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
+    expected = Series([False, False, False], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.contains("foo", na=True)
+    expected = Series([True, True, True], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.contains("foo", na="foo")
+    if any_string_dtype == "object":
+        expected = Series(["foo", "foo", "foo"], dtype=np.object_)
+    else:
+        expected = Series([True, True, True], dtype="boolean")
+    tm.assert_series_equal(result, expected)
+
+    result = s.str.contains("foo")
+    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
+    expected = Series([np.nan, np.nan, np.nan], dtype=expected_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+# --------------------------------------------------------------------------------------
+# str.startswith
+# --------------------------------------------------------------------------------------
+
+
 @pytest.mark.parametrize("dtype", [None, "category"])
 @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
 @pytest.mark.parametrize("na", [True, False])
@@ -195,6 +274,11 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na):
     tm.assert_series_equal(result, exp)
 
 
+# --------------------------------------------------------------------------------------
+# str.endswith
+# --------------------------------------------------------------------------------------
+
+
 @pytest.mark.parametrize("dtype", [None, "category"])
 @pytest.mark.parametrize("null_value", [None, np.nan, pd.NA])
 @pytest.mark.parametrize("na", [True, False])
@@ -242,39 +326,50 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na):
     tm.assert_series_equal(result, exp)
 
 
+# --------------------------------------------------------------------------------------
+# str.replace
+# --------------------------------------------------------------------------------------
+
+
 def test_replace(any_string_dtype):
-    values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
+    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
 
-    result = values.str.replace("BAD[_]*", "", regex=True)
+    result = ser.str.replace("BAD[_]*", "", regex=True)
     expected = Series(["foobar", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
-    result = values.str.replace("BAD[_]*", "", n=1, regex=True)
+
+def test_replace_max_replacements(any_string_dtype):
+    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
+
     expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
+    result = ser.str.replace("BAD[_]*", "", n=1, regex=True)
+    tm.assert_series_equal(result, expected)
+
+    expected = Series(["foo__barBAD", np.nan], dtype=any_string_dtype)
+    result = ser.str.replace("BAD", "", n=1, regex=False)
     tm.assert_series_equal(result, expected)
 
 
 def test_replace_mixed_object():
-    mixed = Series(
+    ser = Series(
         ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0]
     )
-
-    result = Series(mixed).str.replace("BAD[_]*", "", regex=True)
+    result = Series(ser).str.replace("BAD[_]*", "", regex=True)
     expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan])
-    assert isinstance(result, Series)
-    tm.assert_almost_equal(result, expected)
+    tm.assert_series_equal(result, expected)
 
 
 def test_replace_unicode(any_string_dtype):
-    values = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
+    ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
-    result = values.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
+    result = ser.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE, regex=True)
     tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize("repl", [None, 3, {"a": "b"}])
 @pytest.mark.parametrize("data", [["a", "b", None], ["a", "b", "c", "ad"]])
-def test_replace_raises(any_string_dtype, index_or_series, repl, data):
+def test_replace_wrong_repl_type_raises(any_string_dtype, index_or_series, repl, data):
     # https://github.com/pandas-dev/pandas/issues/13438
     msg = "repl must be a string or callable"
     obj = index_or_series(data, dtype=any_string_dtype)
@@ -284,11 +379,11 @@ def test_replace_raises(any_string_dtype, index_or_series, repl, data):
 
 def test_replace_callable(any_string_dtype):
     # GH 15055
-    values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
+    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
 
     # test with callable
     repl = lambda m: m.group(0).swapcase()
-    result = values.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
+    result = ser.str.replace("[a-z][A-Z]{2}", repl, n=2, regex=True)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
@@ -311,100 +406,193 @@ def test_replace_callable_raises(any_string_dtype, repl):
 
 def test_replace_callable_named_groups(any_string_dtype):
     # test regex named groups
-    values = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
+    ser = Series(["Foo Bar Baz", np.nan], dtype=any_string_dtype)
     pat = r"(?P<first>\w+) (?P<middle>\w+) (?P<last>\w+)"
     repl = lambda m: m.group("middle").swapcase()
-    result = values.str.replace(pat, repl, regex=True)
+    result = ser.str.replace(pat, repl, regex=True)
     expected = Series(["bAR", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
 
 def test_replace_compiled_regex(any_string_dtype):
     # GH 15446
-    values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
+    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
 
     # test with compiled regex
     pat = re.compile(r"BAD_*")
-    result = values.str.replace(pat, "", regex=True)
+    result = ser.str.replace(pat, "", regex=True)
     expected = Series(["foobar", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
-    result = values.str.replace(pat, "", n=1, regex=True)
+    result = ser.str.replace(pat, "", n=1, regex=True)
     expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
 
 def test_replace_compiled_regex_mixed_object():
     pat = re.compile(r"BAD_*")
-    mixed = Series(
+    ser = Series(
         ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0]
     )
-
-    result = Series(mixed).str.replace(pat, "", regex=True)
+    result = Series(ser).str.replace(pat, "", regex=True)
     expected = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan])
-    assert isinstance(result, Series)
-    tm.assert_almost_equal(result, expected)
+    tm.assert_series_equal(result, expected)
 
 
 def test_replace_compiled_regex_unicode(any_string_dtype):
-    values = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
+    ser = Series([b"abcd,\xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     expected = Series([b"abcd, \xc3\xa0".decode("utf-8")], dtype=any_string_dtype)
     pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE)
-    result = values.str.replace(pat, ", ")
+    result = ser.str.replace(pat, ", ")
     tm.assert_series_equal(result, expected)
 
 
 def test_replace_compiled_regex_raises(any_string_dtype):
     # case and flags provided to str.replace will have no effect
     # and will produce warnings
-    values = Series(["fooBAD__barBAD__bad", np.nan], dtype=any_string_dtype)
+    ser = Series(["fooBAD__barBAD__bad", np.nan], dtype=any_string_dtype)
     pat = re.compile(r"BAD_*")
 
     msg = "case and flags cannot be set when pat is a compiled regex"
 
     with pytest.raises(ValueError, match=msg):
-        values.str.replace(pat, "", flags=re.IGNORECASE)
+        ser.str.replace(pat, "", flags=re.IGNORECASE)
 
     with pytest.raises(ValueError, match=msg):
-        values.str.replace(pat, "", case=False)
+        ser.str.replace(pat, "", case=False)
 
     with pytest.raises(ValueError, match=msg):
-        values.str.replace(pat, "", case=True)
+        ser.str.replace(pat, "", case=True)
 
 
 def test_replace_compiled_regex_callable(any_string_dtype):
     # test with callable
-    values = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
+    ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype)
     repl = lambda m: m.group(0).swapcase()
     pat = re.compile("[a-z][A-Z]{2}")
-    result = values.str.replace(pat, repl, n=2)
+    result = ser.str.replace(pat, repl, n=2)
     expected = Series(["foObaD__baRbaD", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
 
-def test_replace_literal(any_string_dtype):
+@pytest.mark.parametrize(
+    "regex,expected", [(True, ["bao", "bao", np.nan]), (False, ["bao", "foo", np.nan])]
+)
+def test_replace_literal(regex, expected, any_string_dtype):
     # GH16808 literal replace (regex=False vs regex=True)
-    values = Series(["f.o", "foo", np.nan], dtype=any_string_dtype)
-    expected = Series(["bao", "bao", np.nan], dtype=any_string_dtype)
-    result = values.str.replace("f.", "ba", regex=True)
+    ser = Series(["f.o", "foo", np.nan], dtype=any_string_dtype)
+    expected = Series(expected, dtype=any_string_dtype)
+    result = ser.str.replace("f.", "ba", regex=regex)
     tm.assert_series_equal(result, expected)
 
-    expected = Series(["bao", "foo", np.nan], dtype=any_string_dtype)
-    result = values.str.replace("f.", "ba", regex=False)
-    tm.assert_series_equal(result, expected)
 
-    # Cannot do a literal replace if given a callable repl or compiled
-    # pattern
-    callable_repl = lambda m: m.group(0).swapcase()
-    compiled_pat = re.compile("[a-z][A-Z]{2}")
+def test_replace_literal_callable_raises(any_string_dtype):
+    ser = Series([], dtype=any_string_dtype)
+    repl = lambda m: m.group(0).swapcase()
 
     msg = "Cannot use a callable replacement when regex=False"
     with pytest.raises(ValueError, match=msg):
-        values.str.replace("abc", callable_repl, regex=False)
+        ser.str.replace("abc", repl, regex=False)
+
+
+def test_replace_literal_compiled_raises(any_string_dtype):
+    ser = Series([], dtype=any_string_dtype)
+    pat = re.compile("[a-z][A-Z]{2}")
 
     msg = "Cannot use a compiled regex as replacement pattern with regex=False"
     with pytest.raises(ValueError, match=msg):
-        values.str.replace(compiled_pat, "", regex=False)
+        ser.str.replace(pat, "", regex=False)
+
+
+def test_replace_moar(any_string_dtype):
+    # PR #1179
+    ser = Series(
+        ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
+        dtype=any_string_dtype,
+    )
+
+    result = ser.str.replace("A", "YYY")
+    expected = Series(
+        ["YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog", "cat"],
+        dtype=any_string_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str.replace("A", "YYY", case=False)
+    expected = Series(
+        [
+            "YYY",
+            "B",
+            "C",
+            "YYYYYYbYYY",
+            "BYYYcYYY",
+            "",
+            np.nan,
+            "CYYYBYYY",
+            "dog",
+            "cYYYt",
+        ],
+        dtype=any_string_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
+    expected = Series(
+        [
+            "A",
+            "B",
+            "C",
+            "XX-XX ba",
+            "XX-XX ca",
+            "",
+            np.nan,
+            "XX-XX BA",
+            "XX-XX ",
+            "XX-XX t",
+        ],
+        dtype=any_string_dtype,
+    )
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("regex", [True, False])
+def test_replace_not_case_sensitive(regex, any_string_dtype):
+    ser = Series(["A", "a", np.nan], dtype=any_string_dtype)
+    result = ser.str.replace("A", "A", case=False, regex=regex)
+    expected = Series(["A", "A", np.nan], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_replace_regex_default_warning(any_string_dtype):
+    # https://github.com/pandas-dev/pandas/pull/24809
+    s = Series(["a", "b", "ac", np.nan, ""], dtype=any_string_dtype)
+    msg = (
+        "The default value of regex will change from True to False in a "
+        "future version\\.$"
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = s.str.replace("^.$", "a")
+    expected = Series(["a", "a", "ac", np.nan, ""], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_replace_regex_default_warning_single_character(any_string_dtype):
+    # https://github.com/pandas-dev/pandas/pull/24809
+    s = Series(["a.b", ".", "b", np.nan, ""], dtype=any_string_dtype)
+    msg = re.escape(
+        "The default value of regex will change from True to False in a "
+        "future version. In addition, single character regular expressions will *not* "
+        "be treated as literal strings when regex=True."
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = s.str.replace(".", "a")
+    expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+
+# --------------------------------------------------------------------------------------
+# str.match
+# --------------------------------------------------------------------------------------
 
 
 def test_match(any_string_dtype):
@@ -484,6 +672,11 @@ def test_match_case_kwarg(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
+# --------------------------------------------------------------------------------------
+# str.fullmatch
+# --------------------------------------------------------------------------------------
+
+
 def test_fullmatch(any_string_dtype):
     # GH 32806
     ser = Series(
@@ -523,6 +716,11 @@ def test_fullmatch_case_kwarg(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
+# --------------------------------------------------------------------------------------
+# str.findall
+# --------------------------------------------------------------------------------------
+
+
 def test_findall(any_string_dtype):
     ser = Series(["fooBAD__barBAD", np.nan, "foo", "BAD"], dtype=any_string_dtype)
     result = ser.str.findall("BAD[_]*")
@@ -563,6 +761,11 @@ def test_findall_mixed_object():
     tm.assert_series_equal(result, expected)
 
 
+# --------------------------------------------------------------------------------------
+# str.find
+# --------------------------------------------------------------------------------------
+
+
 def test_find(any_string_dtype):
     ser = Series(
         ["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF", "XXXX"], dtype=any_string_dtype
@@ -646,6 +849,11 @@ def test_find_nan(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
+# --------------------------------------------------------------------------------------
+# str.translate
+# --------------------------------------------------------------------------------------
+
+
 def test_translate(index_or_series, any_string_dtype):
     obj = index_or_series(
         ["abcdefg", "abcc", "cdddfg", "cdefggg"], dtype=any_string_dtype
@@ -670,125 +878,7 @@ def test_translate_mixed_object():
     tm.assert_series_equal(result, expected)
 
 
-def test_contains_moar(any_string_dtype):
-    # PR #1179
-    s = Series(
-        ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
-        dtype=any_string_dtype,
-    )
-
-    result = s.str.contains("a")
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
-    expected = Series(
-        [False, False, False, True, True, False, np.nan, False, False, True],
-        dtype=expected_dtype,
-    )
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.contains("a", case=False)
-    expected = Series(
-        [True, False, False, True, True, False, np.nan, True, False, True],
-        dtype=expected_dtype,
-    )
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.contains("Aa")
-    expected = Series(
-        [False, False, False, True, False, False, np.nan, False, False, False],
-        dtype=expected_dtype,
-    )
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.contains("ba")
-    expected = Series(
-        [False, False, False, True, False, False, np.nan, False, False, False],
-        dtype=expected_dtype,
-    )
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.contains("ba", case=False)
-    expected = Series(
-        [False, False, False, True, True, False, np.nan, True, False, False],
-        dtype=expected_dtype,
-    )
-    tm.assert_series_equal(result, expected)
-
-
-def test_contains_nan(any_string_dtype):
-    # PR #14171
-    s = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype)
-
-    result = s.str.contains("foo", na=False)
-    expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean"
-    expected = Series([False, False, False], dtype=expected_dtype)
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.contains("foo", na=True)
-    expected = Series([True, True, True], dtype=expected_dtype)
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.contains("foo", na="foo")
-    if any_string_dtype == "object":
-        expected = Series(["foo", "foo", "foo"], dtype=np.object_)
-    else:
-        expected = Series([True, True, True], dtype="boolean")
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.contains("foo")
-    expected_dtype = "object" if any_string_dtype == "object" else "boolean"
-    expected = Series([np.nan, np.nan, np.nan], dtype=expected_dtype)
-    tm.assert_series_equal(result, expected)
-
-
-def test_replace_moar(any_string_dtype):
-    # PR #1179
-    s = Series(
-        ["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"],
-        dtype=any_string_dtype,
-    )
-
-    result = s.str.replace("A", "YYY")
-    expected = Series(
-        ["YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog", "cat"],
-        dtype=any_string_dtype,
-    )
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.replace("A", "YYY", case=False)
-    expected = Series(
-        [
-            "YYY",
-            "B",
-            "C",
-            "YYYYYYbYYY",
-            "BYYYcYYY",
-            "",
-            np.nan,
-            "CYYYBYYY",
-            "dog",
-            "cYYYt",
-        ],
-        dtype=any_string_dtype,
-    )
-    tm.assert_series_equal(result, expected)
-
-    result = s.str.replace("^.a|dog", "XX-XX ", case=False, regex=True)
-    expected = Series(
-        [
-            "A",
-            "B",
-            "C",
-            "XX-XX ba",
-            "XX-XX ca",
-            "",
-            np.nan,
-            "XX-XX BA",
-            "XX-XX ",
-            "XX-XX t",
-        ],
-        dtype=any_string_dtype,
-    )
-    tm.assert_series_equal(result, expected)
+# --------------------------------------------------------------------------------------
 
 
 def test_flags_kwarg(any_string_dtype):

From 19ee363dc4b4301ea73c1cf50462fea3aae62d5c Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 21 May 2021 11:55:12 +0100
Subject: [PATCH 2/5] parametrize test_replace_regex_single_character

---
 pandas/tests/strings/test_find_replace.py | 26 ++++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index f9054b3de0a72..80be19438f39d 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -576,16 +576,26 @@ def test_replace_regex_default_warning(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
-def test_replace_regex_default_warning_single_character(any_string_dtype):
+@pytest.mark.parametrize("regex", [True, False, None])
+def test_replace_regex_single_character(regex, any_string_dtype):
     # https://github.com/pandas-dev/pandas/pull/24809
+
+    # The current behavior is to treat single character patterns as literal strings,
+    # even when ``regex`` is set to ``True``.
+
     s = Series(["a.b", ".", "b", np.nan, ""], dtype=any_string_dtype)
-    msg = re.escape(
-        "The default value of regex will change from True to False in a "
-        "future version. In addition, single character regular expressions will *not* "
-        "be treated as literal strings when regex=True."
-    )
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        result = s.str.replace(".", "a")
+
+    if regex is None:
+        msg = re.escape(
+            "The default value of regex will change from True to False in a future "
+            "version. In addition, single character regular expressions will *not* "
+            "be treated as literal strings when regex=True."
+        )
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = s.str.replace(".", "a", regex=regex)
+    else:
+        result = s.str.replace(".", "a", regex=regex)
+
     expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 

From fa666c24e849e2545e0fa4bf9fa3fa67f8ba9928 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 21 May 2021 12:01:32 +0100
Subject: [PATCH 3/5] undo bugfix that's not supposed to be fixed yet and add
 test

---
 pandas/core/strings/accessor.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index f48b38d38c53b..ca0067b77ee23 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -1377,11 +1377,13 @@ def replace(
         elif callable(repl):
             raise ValueError("Cannot use a callable replacement when regex=False")
 
+        # The current behavior is to treat single character patterns as literal strings,
+        # even when ``regex`` is set to ``True``.
+        if isinstance(pat, str) and len(pat) == 1:
+            regex = False
+
         if regex is None:
-            if isinstance(pat, str) and len(pat) == 1:
-                regex = False
-            else:
-                regex = True
+            regex = True
 
         if case is None:
             case = True

From 2505ca41945820307ade2b6d6ccaaae626fa8f36 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 21 May 2021 13:00:03 +0100
Subject: [PATCH 4/5] fix case flag with regex=False

---
 pandas/core/strings/object_array.py       |  2 ++
 pandas/tests/strings/test_find_replace.py | 14 +++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py
index 8505a88adc212..c214ada9c1ada 100644
--- a/pandas/core/strings/object_array.py
+++ b/pandas/core/strings/object_array.py
@@ -147,6 +147,8 @@ def _str_replace(
 
         if regex or flags or callable(repl):
             if not isinstance(pat, re.Pattern):
+                if regex is False:
+                    pat = re.escape(pat)
                 pat = re.compile(pat, flags=flags)
 
             n = n if n >= 0 else 0
diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index 80be19438f39d..ecc15c8b32004 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -555,11 +555,15 @@ def test_replace_moar(any_string_dtype):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("regex", [True, False])
-def test_replace_not_case_sensitive(regex, any_string_dtype):
-    ser = Series(["A", "a", np.nan], dtype=any_string_dtype)
-    result = ser.str.replace("A", "A", case=False, regex=regex)
-    expected = Series(["A", "A", np.nan], dtype=any_string_dtype)
+def test_replace_not_case_sensitive_not_regex(any_string_dtype):
+    ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)
+
+    result = ser.str.replace("a", "c", case=False, regex=False)
+    expected = Series(["c.", "c.", "cb", "cb", np.nan], dtype=any_string_dtype)
+    tm.assert_series_equal(result, expected)
+
+    result = ser.str.replace("a.", "c.", case=False, regex=False)
+    expected = Series(["c.", "c.", "Ab", "ab", np.nan], dtype=any_string_dtype)
     tm.assert_series_equal(result, expected)
 
 

From 26bd1efb43d22d078995f91926cfc8eaac8225fa Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 21 May 2021 13:16:21 +0100
Subject: [PATCH 5/5] add issue number

---
 pandas/tests/strings/test_find_replace.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py
index ecc15c8b32004..391c71e57399a 100644
--- a/pandas/tests/strings/test_find_replace.py
+++ b/pandas/tests/strings/test_find_replace.py
@@ -556,6 +556,7 @@ def test_replace_moar(any_string_dtype):
 
 
 def test_replace_not_case_sensitive_not_regex(any_string_dtype):
+    # https://github.com/pandas-dev/pandas/issues/41602
     ser = Series(["A.", "a.", "Ab", "ab", np.nan], dtype=any_string_dtype)
 
     result = ser.str.replace("a", "c", case=False, regex=False)