From 59aad8d43ce1328e98964d7b610ec8b76c92d344 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 5 Mar 2019 13:40:57 +0100 Subject: [PATCH 1/2] bpo-29571: Fix test_re.test_locale_flag() Use locale.getpreferredencoding() rather than locale.getlocale() to get the locale encoding. With some locales, locale.getlocale() returns the wrong encoding. For example, on Fedora 29, locale.getlocale() returns ISO-8859-1 encoding for the "en_IN" locale, whereas locale.getpreferredencoding() reports the correct encoding: UTF-8. On Windows, set temporarily the LC_CTYPE locale to the user preferred encoding to ensure that it uses the ANSI code page, to be consistent with locale.getpreferredencoding(). --- Lib/test/test_re.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0b710e3766ab82..5ef6d7b12c50a3 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -1516,8 +1516,18 @@ def test_ascii_and_unicode_flag(self): self.assertRaises(re.error, re.compile, r'(?au)\w') def test_locale_flag(self): - import locale - _, enc = locale.getlocale(locale.LC_CTYPE) + # On Windows, Python 3.7 doesn't call setlocale(LC_CTYPE, "") at + # startup and so the LC_CTYPE locale uses Latin1 encoding by default, + # whereas getpreferredencoding() returns the ANSI code page. Set + # temporarily the LC_CTYPE locale to the user preferred encoding to + # ensure that it uses the ANSI code page. + oldloc = locale.setlocale(locale.LC_CTYPE, None) + locale.setlocale(locale.LC_CTYPE, "") + self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldloc) + + # Get the current locale encoding + enc = locale.getpreferredencoding(False) + # Search non-ASCII letter for i in range(128, 256): try: From 6368e30f8654569c0a325caca97019a4d40bf8b5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 5 Mar 2019 13:48:41 +0100 Subject: [PATCH 2/2] Add NEWS entry --- .../next/Tests/2019-03-05-13-48-39.bpo-29571.ecGuKR.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Tests/2019-03-05-13-48-39.bpo-29571.ecGuKR.rst diff --git a/Misc/NEWS.d/next/Tests/2019-03-05-13-48-39.bpo-29571.ecGuKR.rst b/Misc/NEWS.d/next/Tests/2019-03-05-13-48-39.bpo-29571.ecGuKR.rst new file mode 100644 index 00000000000000..f89aec5e8d52dc --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2019-03-05-13-48-39.bpo-29571.ecGuKR.rst @@ -0,0 +1,6 @@ +Fix ``test_re.test_locale_flag()``: use ``locale.getpreferredencoding()`` +rather than ``locale.getlocale()`` to get the locale encoding. With some +locales, ``locale.getlocale()`` returns the wrong encoding. On Windows, set +temporarily the ``LC_CTYPE`` locale to the user preferred encoding to ensure +that it uses the ANSI code page, to be consistent with +``locale.getpreferredencoding()``.