Skip to content

Commit c5b049b

Browse files
authored
bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)
1 parent b4d8953 commit c5b049b

File tree

4 files changed

+21
-2
lines changed

4 files changed

+21
-2
lines changed

Doc/whatsnew/3.10.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,11 @@ by :func:`curses.color_content`, :func:`curses.init_color`,
186186
support is provided by the underlying ncurses library.
187187
(Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.)
188188

189+
encodings
190+
---------
191+
:func:`encodings.normalize_encoding` now ignores non-ASCII characters.
192+
(Contributed by Hai Shi in :issue:`39337`.)
193+
189194
glob
190195
----
191196

Lib/encodings/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ def normalize_encoding(encoding):
6161
if c.isalnum() or c == '.':
6262
if punct and chars:
6363
chars.append('_')
64-
chars.append(c)
64+
if c.isascii():
65+
chars.append(c)
6566
punct = False
6667
else:
6768
punct = True

Lib/test/test_codecs.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3417,7 +3417,7 @@ def test_rot13_func(self):
34173417

34183418
class CodecNameNormalizationTest(unittest.TestCase):
34193419
"""Test codec name normalization"""
3420-
def test_normalized_encoding(self):
3420+
def test_codecs_lookup(self):
34213421
FOUND = (1, 2, 3, 4)
34223422
NOT_FOUND = (None, None, None, None)
34233423
def search_function(encoding):
@@ -3439,6 +3439,18 @@ def search_function(encoding):
34393439
self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
34403440
self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
34413441

3442+
def test_encodings_normalize_encoding(self):
3443+
# encodings.normalize_encoding() ignores non-ASCII characters.
3444+
normalize = encodings.normalize_encoding
3445+
self.assertEqual(normalize('utf_8'), 'utf_8')
3446+
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
3447+
self.assertEqual(normalize('utf 8'), 'utf_8')
3448+
# encodings.normalize_encoding() doesn't convert
3449+
# characters to lower case.
3450+
self.assertEqual(normalize('UTF 8'), 'UTF_8')
3451+
self.assertEqual(normalize('utf.8'), 'utf.8')
3452+
self.assertEqual(normalize('utf...8'), 'utf...8')
3453+
34423454

34433455
if __name__ == "__main__":
34443456
unittest.main()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:func:`encodings.normalize_encoding` now ignores non-ASCII characters.

0 commit comments

Comments
 (0)