From 05ae5adfc4943b9f6908385afa2240896e2d1c49 Mon Sep 17 00:00:00 2001 From: James Seo Date: Wed, 8 Mar 2023 07:59:43 -0800 Subject: [PATCH 01/12] Add Ascii85, base85, and Z85 support to binascii Add Ascii85, base85, and Z85 encoders and decoders to `binascii`, replacing the existing pure Python implementations in `base64`. No API or documentation changes are necessary with respect to `base64.a85encode()`, `b85encode()`, etc., and all existing unit tests for those functions continue to pass without modification. Note that attempting to decode Ascii85 or base85 data of length 1 mod 5 (after accounting for Ascii85 quirks) now produces an error, as no encoder would emit such data. This should be the only significant externally visible difference compared to the old implementation. Resolves: gh-101178 --- Doc/library/binascii.rst | 73 +++ .../pycore_global_objects_fini_generated.h | 5 + Include/internal/pycore_global_strings.h | 5 + .../internal/pycore_runtime_init_generated.h | 5 + .../internal/pycore_unicodeobject_generated.h | 20 + Lib/base64.py | 179 +----- Lib/test/test_binascii.py | 337 ++++++++++- ...-04-20-21-54-51.gh-issue-101178.4k2Sib.rst | 2 + Modules/binascii.c | 541 ++++++++++++++++++ Modules/clinic/binascii.c.h | 419 +++++++++++++- 10 files changed, 1412 insertions(+), 174 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-20-21-54-51.gh-issue-101178.4k2Sib.rst diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 1bab785684bbab..6e4dd23c8c62b0 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -77,6 +77,79 @@ The :mod:`binascii` module defines the following functions: Added the *newline* parameter. +.. function:: a2b_ascii85(string, /, *, fold_spaces=False, wrap=False, ignore=b"") + + Convert Ascii85 data back to binary and return the binary data. + + Valid Ascii85 data contains characters from the Ascii85 alphabet in groups + of five (except for the final group, which may have from two to five + characters). Each group encodes 32 bits of binary data in the range from + ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is + accepted as a short form of the group ``!!!!!``, which encodes four + consecutive null bytes. + + If *fold_spaces* is true, the special character ``y`` is also accepted as a + short form of the group ``+``, as in + the Adobe Ascii85 format. + + *ignore* is an optional bytes-like object that specifies characters to + ignore in the input. + + Invalid Ascii85 data will raise :exc:`binascii.Error`. + + +.. function:: b2a_ascii85(data, /, *, fold_spaces=False, wrap=False, width=0, pad=False) + + Convert binary data to a formatted sequence of ASCII characters in Ascii85 + coding. The return value is the converted data. + + If *fold_spaces* is true, four consecutive spaces are encoded as the + special character ``y`` instead of the sequence ``+``, as + in the Adobe Ascii85 format. + + If *width* is provided and greater than 0, the output is split into lines + of no more than the specified width separated by the ASCII newline + character. + + If *pad* is true, the input is padded to a multiple of 4 before encoding. + + +.. function:: a2b_base85(string, /, *, strict_mode=False, z85=False) + + Convert base85 data back to binary and return the binary data. + More than one line may be passed at a time. + + If *strict_mode* is true, only valid base85 data will be converted. + Invalid base85 data will raise :exc:`binascii.Error`. + + If *z85* is true, the base85 data uses the Z85 alphabet. + See `Z85 specification `_ for more information. + + Valid base85 data contains characters from the base85 alphabet in groups + of five (except for the final group, which may have from two to five + characters). Each group encodes 32 bits of binary data in the range from + ``0`` to ``2 ** 32 - 1``, inclusive. + + +.. function:: b2a_base85(data, /, *, pad=False, newline=True, z85=False) + + Convert binary data to a line of ASCII characters in base85 coding. + The return value is the converted line. + + If *pad* is true, the input is padded to a multiple of 4 before encoding. + + If *newline* is true, a newline char is appended to the result. + + If *z85* is true, the Z85 alphabet is used for conversion. + See `Z85 specification `_ for more information. + + .. function:: a2b_qp(data, header=False) Convert a block of quoted-printable data back to binary and return the binary diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 5485d0bd64f3f1..0bb6729eeabbe1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -963,6 +963,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flush)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold_spaces)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format_spec)); @@ -1145,6 +1146,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outpath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pad)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password)); @@ -1301,11 +1303,14 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(weekday)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(which)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(who)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(width)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(withdata)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wrap)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(writable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(year)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(z85)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(zdict)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[0]); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[1]); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 3ce192511e3879..f178ee5eb3ffe6 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -454,6 +454,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(flags) STRUCT_FOR_ID(flush) STRUCT_FOR_ID(fold) + STRUCT_FOR_ID(fold_spaces) STRUCT_FOR_ID(follow_symlinks) STRUCT_FOR_ID(format) STRUCT_FOR_ID(format_spec) @@ -636,6 +637,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(outpath) STRUCT_FOR_ID(overlapped) STRUCT_FOR_ID(owner) + STRUCT_FOR_ID(pad) STRUCT_FOR_ID(pages) STRUCT_FOR_ID(parent) STRUCT_FOR_ID(password) @@ -792,11 +794,14 @@ struct _Py_global_strings { STRUCT_FOR_ID(weekday) STRUCT_FOR_ID(which) STRUCT_FOR_ID(who) + STRUCT_FOR_ID(width) STRUCT_FOR_ID(withdata) + STRUCT_FOR_ID(wrap) STRUCT_FOR_ID(writable) STRUCT_FOR_ID(write) STRUCT_FOR_ID(write_through) STRUCT_FOR_ID(year) + STRUCT_FOR_ID(z85) STRUCT_FOR_ID(zdict) } identifiers; struct { diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 5c95d0feddecba..0046d48ff9215a 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -961,6 +961,7 @@ extern "C" { INIT_ID(flags), \ INIT_ID(flush), \ INIT_ID(fold), \ + INIT_ID(fold_spaces), \ INIT_ID(follow_symlinks), \ INIT_ID(format), \ INIT_ID(format_spec), \ @@ -1143,6 +1144,7 @@ extern "C" { INIT_ID(outpath), \ INIT_ID(overlapped), \ INIT_ID(owner), \ + INIT_ID(pad), \ INIT_ID(pages), \ INIT_ID(parent), \ INIT_ID(password), \ @@ -1299,11 +1301,14 @@ extern "C" { INIT_ID(weekday), \ INIT_ID(which), \ INIT_ID(who), \ + INIT_ID(width), \ INIT_ID(withdata), \ + INIT_ID(wrap), \ INIT_ID(writable), \ INIT_ID(write), \ INIT_ID(write_through), \ INIT_ID(year), \ + INIT_ID(z85), \ INIT_ID(zdict), \ } diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index a1fc9736d66618..6bd14ec858f7d6 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1604,6 +1604,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fold_spaces); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(follow_symlinks); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2332,6 +2336,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pad); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(pages); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2956,10 +2964,18 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(width); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(withdata); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(wrap); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(writable); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2976,6 +2992,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(z85); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(zdict); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..20542eeb447c55 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -290,36 +290,6 @@ def b16decode(s, casefold=False): # # Ascii85 encoding/decoding # - -_a85chars = None -_a85chars2 = None -_A85START = b"<~" -_A85END = b"~>" - -def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): - # Helper function for a85encode and b85encode - if not isinstance(b, bytes_types): - b = memoryview(b).tobytes() - - padding = (-len(b)) % 4 - if padding: - b = b + b'\0' * padding - words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) - - chunks = [b'z' if foldnuls and not word else - b'y' if foldspaces and word == 0x20202020 else - (chars2[word // 614125] + - chars2[word // 85 % 7225] + - chars[word % 85]) - for word in words] - - if padding and not pad: - if chunks[-1] == b'z': - chunks[-1] = chars[0] * 5 - chunks[-1] = chunks[-1][:-padding] - - return b''.join(chunks) - def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): """Encode bytes-like object b using Ascii85 and return a bytes object. @@ -337,29 +307,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): adobe controls whether the encoded byte sequence is framed with <~ and ~>, which is used by the Adobe implementation. """ - global _a85chars, _a85chars2 - # Delay the initialization of tables to not waste memory - # if the function is never called - if _a85chars2 is None: - _a85chars = [bytes((i,)) for i in range(33, 118)] - _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] - - result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) - - if adobe: - result = _A85START + result - if wrapcol: - wrapcol = max(2 if adobe else 1, wrapcol) - chunks = [result[i: i + wrapcol] - for i in range(0, len(result), wrapcol)] - if adobe: - if len(chunks[-1]) + 2 > wrapcol: - chunks.append(b'') - result = b'\n'.join(chunks) - if adobe: - result += _A85END - - return result + return binascii.b2a_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, width=wrapcol, pad=pad) def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): """Decode the Ascii85 encoded bytes-like object or ASCII string b. @@ -378,66 +327,8 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): The result is returned as a bytes object. """ b = _bytes_from_decode_data(b) - if adobe: - if not b.endswith(_A85END): - raise ValueError( - "Ascii85 encoded byte sequences must end " - "with {!r}".format(_A85END) - ) - if b.startswith(_A85START): - b = b[2:-2] # Strip off start/end markers - else: - b = b[:-2] - # - # We have to go through this stepwise, so as to ignore spaces and handle - # special short sequences - # - packI = struct.Struct('!I').pack - decoded = [] - decoded_append = decoded.append - curr = [] - curr_append = curr.append - curr_clear = curr.clear - for x in b + b'u' * 4: - if b'!'[0] <= x <= b'u'[0]: - curr_append(x) - if len(curr) == 5: - acc = 0 - for x in curr: - acc = 85 * acc + (x - 33) - try: - decoded_append(packI(acc)) - except struct.error: - raise ValueError('Ascii85 overflow') from None - curr_clear() - elif x == b'z'[0]: - if curr: - raise ValueError('z inside Ascii85 5-tuple') - decoded_append(b'\0\0\0\0') - elif foldspaces and x == b'y'[0]: - if curr: - raise ValueError('y inside Ascii85 5-tuple') - decoded_append(b'\x20\x20\x20\x20') - elif x in ignorechars: - # Skip whitespace - continue - else: - raise ValueError('Non-Ascii85 digit found: %c' % x) - - result = b''.join(decoded) - padding = 4 - len(curr) - if padding: - # Throw away the extra padding - result = result[:-padding] - return result - -# The following code is originally taken (with permission) from Mercurial - -_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" - b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") -_b85chars = None -_b85chars2 = None -_b85dec = None + return binascii.a2b_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, ignore=ignorechars) def b85encode(b, pad=False): """Encode bytes-like object b in base85 format and return a bytes object. @@ -445,69 +336,19 @@ def b85encode(b, pad=False): If pad is true, the input is padded with b'\\0' so its length is a multiple of 4 bytes before encoding. """ - global _b85chars, _b85chars2 - # Delay the initialization of tables to not waste memory - # if the function is never called - if _b85chars2 is None: - _b85chars = [bytes((i,)) for i in _b85alphabet] - _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] - return _85encode(b, _b85chars, _b85chars2, pad) + return binascii.b2a_base85(b, pad=pad, newline=False) def b85decode(b): """Decode the base85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. """ - global _b85dec - # Delay the initialization of tables to not waste memory - # if the function is never called - if _b85dec is None: - _b85dec = [None] * 256 - for i, c in enumerate(_b85alphabet): - _b85dec[c] = i - b = _bytes_from_decode_data(b) - padding = (-len(b)) % 5 - b = b + b'~' * padding - out = [] - packI = struct.Struct('!I').pack - for i in range(0, len(b), 5): - chunk = b[i:i + 5] - acc = 0 - try: - for c in chunk: - acc = acc * 85 + _b85dec[c] - except TypeError: - for j, c in enumerate(chunk): - if _b85dec[c] is None: - raise ValueError('bad base85 character at position %d' - % (i + j)) from None - raise - try: - out.append(packI(acc)) - except struct.error: - raise ValueError('base85 overflow in hunk starting at byte %d' - % i) from None - - result = b''.join(out) - if padding: - result = result[:-padding] - return result - -_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' - b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') -# Translating b85 valid but z85 invalid chars to b'\x00' is required -# to prevent them from being decoded as b85 valid chars. -_z85_b85_decode_diff = b';_`|~' -_z85_decode_translation = bytes.maketrans( - _z85alphabet + _z85_b85_decode_diff, - _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) -) -_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) + return binascii.a2b_base85(b, strict_mode=True) def z85encode(s): """Encode bytes-like object b in z85 format and return a bytes object.""" - return b85encode(s).translate(_z85_encode_translation) + return binascii.b2a_base85(s, newline=False, z85=True) def z85decode(s): """Decode the z85-encoded bytes-like object or ASCII string b @@ -515,11 +356,7 @@ def z85decode(s): The result is returned as a bytes object. """ s = _bytes_from_decode_data(s) - s = s.translate(_z85_decode_translation) - try: - return b85decode(s) - except ValueError as e: - raise ValueError(e.args[0].replace('base85', 'z85')) from None + return binascii.a2b_base85(s, strict_mode=True, z85=True) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 1f3b6746ce4a62..56a6a6d989133b 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -9,9 +9,11 @@ # Note: "*_hex" functions are aliases for "(un)hexlify" -b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_qp', 'b2a_uu', +b2a_functions = ['b2a_ascii85', 'b2a_base64', 'b2a_base85', + 'b2a_hex', 'b2a_qp', 'b2a_uu', 'hexlify'] -a2b_functions = ['a2b_base64', 'a2b_hex', 'a2b_qp', 'a2b_uu', +a2b_functions = ['a2b_ascii85', 'a2b_base64', 'a2b_base85', + 'a2b_hex', 'a2b_qp', 'a2b_uu', 'unhexlify'] all_functions = a2b_functions + b2a_functions + ['crc32', 'crc_hqx'] @@ -207,6 +209,337 @@ def assertInvalidLength(data): assertInvalidLength(b'a' * (4 * 87 + 1)) assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters + def test_ascii85_valid(self): + # Test Ascii85 with valid data + ASCII85_PREFIX = b"<~" + ASCII85_SUFFIX = b"~>" + + # Interleave blocks of 4 null bytes and 4 spaces into test data + rawdata = bytearray() + rawlines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = b"\0\0\0\0" if k & 1 else b" " + b = b + self.rawdata[i:i + k] + b = b" " if k & 1 else b"\0\0\0\0" + rawdata += b + rawlines.append(b) + i += k + if i >= len(self.rawdata): + break + + # Test core parameter combinations + params = (False, False), (False, True), (True, False), (True, True) + for fold_spaces, wrap in params: + lines = [] + for rawline in rawlines: + b = self.type2test(rawline) + a = binascii.b2a_ascii85(b, fold_spaces=fold_spaces, wrap=wrap) + lines.append(a) + res = bytearray() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_ascii85(a, fold_spaces=fold_spaces, wrap=wrap) + res += b + self.assertEqual(res, rawdata) + + def test_ascii85_invalid(self): + # Test Ascii85 with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_ascii85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"!\"#$%&'()*+,-./0123456789:;<=>?@" \ + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu" + b"z" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_ascii85(a, ignore=fillers) + res += b + self.assertEqual(res, self.rawdata) + + # Test Ascii85 with only invalid characters + fillers = self.type2test(fillers) + b = binascii.a2b_ascii85(fillers, ignore=fillers) + self.assertEqual(b, b"") + + def test_ascii85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_ascii85(self.type2test(data), **kwargs) + + def assertMissingDelimiter(data): + _assertRegexTemplate(r"(?i)end with '~>'", data, wrap=True) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)85 overflow", data) + + def assertInvalidSpecial(data): + _assertRegexTemplate(r"(?i)'[yz]'.+5-tuple", data, fold_spaces=True) + + def assertInvalidChar(data, **kwargs): + _assertRegexTemplate(r"(?i)invalid in Ascii85", data, **kwargs) + + def assertInvalidLength(data): + _assertRegexTemplate(r"(?i)invalid length", data) + + # Test Ascii85 with missing delimiters + assertMissingDelimiter(b"") + assertMissingDelimiter(b"a") + assertMissingDelimiter(b"<~") + assertMissingDelimiter(b"<~!~") + assertMissingDelimiter(b"<~abc>") + assertMissingDelimiter(b"<~has delimiter but not terminal~> !") + + # Test Ascii85 with out-of-range encoded value + assertOverflow(b"t") + assertOverflow(b"s9") + assertOverflow(b"s8X") + assertOverflow(b"s8W.") + assertOverflow(b's8W-"') + assertOverflow(b"s8W-!u") + assertOverflow(b"s8W-!s8W-!zs8X") + + # Test Ascii85 with misplaced short form groups + assertInvalidSpecial(b"ay") + assertInvalidSpecial(b"az") + assertInvalidSpecial(b"aby") + assertInvalidSpecial(b"ayz") + assertInvalidSpecial(b"abcz") + assertInvalidSpecial(b"abcdy") + assertInvalidSpecial(b"y!and!z!then!!y") + + # Test Ascii85 with non-ignored invalid characters + assertInvalidChar(b"j\n") + assertInvalidChar(b" ", ignore=b"") + assertInvalidChar(b" valid\x02until\x03", ignore=b"\x00\x01\x02\x04") + assertInvalidChar(b"\tFCb", ignore=b"\n") + assertInvalidChar(b"xxxB\nP\thU'D v/F+", ignore=b" \n\tv") + + # Test Ascii85 with invalid length of final group (1 mod 5) + assertInvalidLength(b"a") + assertInvalidLength(b"b") + assertInvalidLength(b"zc") + assertInvalidLength(b"zza") + assertInvalidLength(b"!!!!!a") + assertInvalidLength(b"+"), + (b"", 1, b"", b"<~\n~>"), + (b"a", 0, b"@/", b"<~@/~>"), + (b"a", 1, b"@\n/", b"<~\n@/\n~>"), + (b"a", 2, b"@/", b"<~\n@/\n~>"), + (b"a", 3, b"@/", b"<~@\n/~>"), + (b"a", 4, b"@/", b"<~@/\n~>"), + (b"a", 5, b"@/", b"<~@/\n~>"), + (b"a", 6, b"@/", b"<~@/~>"), + (b"a", 7, b"@/", b"<~@/~>"), + (b"a", 123, b"@/", b"<~@/~>"), + (b"this is a test", 7, b"FD,B0+D\nGm>@3BZ\n'F*%", + b"<~FD,B0\n+DGm>@3\nBZ'F*%\n~>"), + (b"a test!!!!!!! ", 11, b"@3BZ'F*&QK+\nX&!P+WqmM+9", + b"<~@3BZ'F*&Q\nK+X&!P+WqmM\n+9~>"), + (b"\0" * 56, 7, b"zzzzzzz\nzzzzzzz", b"<~zzzzz\nzzzzzzz\nzz~>"), + ] + for b, n, a, a_wrap in tests: + assertEncode(a, b, n) + assertEncode(a_wrap, b, n, wrap=True) + assertDecode(a, b) + assertDecode(a_wrap, b, wrap=True) + + def test_ascii85_pad(self): + # Test Ascii85 with encode padding + rawdata = b"n1n3tee\n ch@rAcTer$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_ascii85(self.type2test(b), pad=True) + b_pad = binascii.a2b_ascii85(self.type2test(a_pad)) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + # Test Ascii85 short form groups with encode padding + def assertShortPad(data, expected, **kwargs): + data = self.type2test(data) + res = binascii.b2a_ascii85(data, **kwargs) + self.assertEqual(res, expected) + + assertShortPad(b"\0", b"!!", pad=False) + assertShortPad(b"\0", b"z", pad=True) + assertShortPad(b"\0" * 2, b"z", pad=True) + assertShortPad(b"\0" * 3, b"z", pad=True) + assertShortPad(b"\0" * 4, b"z", pad=True) + assertShortPad(b"\0" * 5, b"zz", pad=True) + assertShortPad(b"\0" * 6, b"z!!!") + assertShortPad(b" " * 7, b"y+", + fold_spaces=True, wrap=True, pad=True) + assertShortPad(b"\0\0\0\0abcd \0\0", b"<~z@:E_Wy\nz~>", + fold_spaces=True, wrap=True, width=9, pad=True) + + def test_ascii85_ignore(self): + # Test Ascii85 with ignored characters + def assertIgnore(data, expected, ignore=b"", **kwargs): + data = self.type2test(data) + ignore = self.type2test(ignore) + with self.assertRaisesRegex(binascii.Error, r"(?i)invalid in Ascii85"): + binascii.a2b_ascii85(data, **kwargs) + res = binascii.a2b_ascii85(data, ignore=ignore, **kwargs) + self.assertEqual(res, expected) + + assertIgnore(b"\n", b"", ignore=b"\n") + assertIgnore(b"<~ ~>", b"", ignore=b" ", wrap=True) + assertIgnore(b"z|z", b"\0" * 8, ignore=b"|||") # repeats don't matter + assertIgnore(b"zz!!|", b"\0" * 9, ignore=b"|!z") # ignore only if invalid + assertIgnore(b"<~B P~@~>", b"hi", ignore=b" <~>", wrap=True) + assertIgnore(b"zy}", b"\0\0\0\0", ignore=b"zy}") + assertIgnore(b"zy}", b"\0\0\0\0 ", ignore=b"zy}", fold_spaces=True) + + def test_base85_valid(self): + # Test base85 with valid data + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + res = bytes() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_base85(a) + res += b + self.assertEqual(res, self.rawdata) + + def test_base85_invalid(self): + # Test base85 with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_base85(a) + res += b + self.assertEqual(res, self.rawdata) + + def test_base85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_base85(self.type2test(data), **kwargs) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)base85 overflow", data) + + def assertInvalidLength(data): + _assertRegexTemplate(r"(?i)invalid length", data) + + # Test base85 with out-of-range encoded value + assertOverflow(b"}") + assertOverflow(b"|O") + assertOverflow(b"|Nt") + assertOverflow(b"|NsD") + assertOverflow(b"|NsC1") + assertOverflow(b"|NsC0~") + assertOverflow(b"|NsC0|NsC0|NsD0") + + # Test base85 with invalid length of final group (1 mod 5) + assertInvalidLength(b"0") + assertInvalidLength(b"1") + assertInvalidLength(b"^^^^^^") + assertInvalidLength(b"|NsC0|NsC0a") + assertInvalidLength(b"_" * (5 * 43 + 21)) + + def test_base85_pad(self): + # Test base85 with encode padding + rawdata = b"n1n3Tee\n ch@rAc\te\r$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_base85(self.type2test(b), pad=True) + b_pad = binascii.a2b_base85(self.type2test(a_pad)) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + def test_base85_strict_mode(self): + # Test base85 with strict mode on + def assertNonBase85Data(data, expected): + data = self.type2test(data) + with self.assertRaisesRegex(binascii.Error, r"(?i)invalid in base85"): + binascii.a2b_base85(data, strict_mode=True) + default_res = binascii.a2b_base85(data) + non_strict_res = binascii.a2b_base85(data, strict_mode=False) + self.assertEqual(default_res, non_strict_res) + self.assertEqual(non_strict_res, expected) + + assertNonBase85Data(b"\xda", b"") + assertNonBase85Data(b"00\0\0", b"\0") + assertNonBase85Data(b"Z )*", b"ok") + assertNonBase85Data(b"bY*jNb0Hyq\n", b"tests!!~") + + def test_base85_newline(self): + # Test base85 newline parameter + b = self.type2test(b"t3s\t ") + self.assertEqual(binascii.b2a_base85(b), b"bTe}aAO\n") + self.assertEqual(binascii.b2a_base85(b, newline=True), b"bTe}aAO\n") + self.assertEqual(binascii.b2a_base85(b, newline=False), b"bTe}aAO") + + def test_base85_z85(self): + # Test base85 z85 parameter + b = self.type2test(b"t3s\t ") + a = self.type2test(b"BtE$Aao\n") + self.assertEqual(binascii.b2a_base85(b, z85=True), b"BtE$Aao\n") + self.assertEqual(binascii.a2b_base85(a, z85=True), b"t3s\t ") + def test_uu(self): MAX_UU = 45 for backtick in (True, False): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-20-21-54-51.gh-issue-101178.4k2Sib.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-20-21-54-51.gh-issue-101178.4k2Sib.rst new file mode 100644 index 00000000000000..fef1052b738a80 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-20-21-54-51.gh-issue-101178.4k2Sib.rst @@ -0,0 +1,2 @@ +Add Ascii85 and base85 support to :mod:`binascii` and improve the +performance of the base-85 converters in :mod:`base64`. diff --git a/Modules/binascii.c b/Modules/binascii.c index 6bb01d148b6faa..f22ca6943ad313 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -105,6 +105,84 @@ static const unsigned char table_b2a_base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const unsigned char table_a2b_base85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,62,-1,63, 64,65,66,-1, 67,68,69,70, -1,71,-1,-1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,72, 73,74,75,76, + 77,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, + 25,26,27,28, 29,30,31,32, 33,34,35,-1, -1,-1,78,79, + 80,36,37,38, 39,40,41,42, 43,44,45,46, 47,48,49,50, + 51,52,53,54, 55,56,57,58, 59,60,61,81, 82,83,84,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_a2b_base85_a85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, + 15,16,17,18, 19,20,21,22, 23,24,25,26, 27,28,29,30, + 31,32,33,34, 35,36,37,38, 39,40,41,42, 43,44,45,46, + 47,48,49,50, 51,52,53,54, 55,56,57,58, 59,60,61,62, + 63,64,65,66, 67,68,69,70, 71,72,73,74, 75,76,77,78, + 79,80,81,82, 83,84,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_a2b_base85_z85[] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,68,-1,84, 83,82,72,-1, 75,76,70,65, -1,63,62,69, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,64,-1, 73,66,74,71, + 81,36,37,38, 39,40,41,42, 43,44,45,46, 47,48,49,50, + 51,52,53,54, 55,56,57,58, 59,60,61,77, -1,78,67,-1, + -1,10,11,12, 13,14,15,16, 17,18,19,20, 21,22,23,24, + 25,26,27,28, 29,30,31,32, 33,34,35,79, -1,80,-1,-1, + + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, +}; + +static const unsigned char table_b2a_base85[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ + "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; + +static const unsigned char table_b2a_base85_a85[] = + "!\"#$%&\'()*+,-./0123456789:;<=>?@" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstu"; + +static const unsigned char table_b2a_base85_z85[] = + "0123456789abcdefghijklmnopqrstuvwxyz" \ + "ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/\x2a?&<>()[]{}@%$#"; /* clinic doesn't like '/' followed by '*' */ + +#define BASE85_A85_PREFIX '<' +#define BASE85_A85_AFFIX '~' +#define BASE85_A85_SUFFIX '>' +#define BASE85_A85_Z 0x00000000 +#define BASE85_A85_Y 0x20202020 + static const unsigned short crctab_hqx[256] = { 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, @@ -587,6 +665,465 @@ binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline) return _PyBytesWriter_Finish(&writer, ascii_data); } +/*[clinic input] +binascii.a2b_ascii85 + + data: ascii_buffer + / + * + fold_spaces: bool = False + Allow 'y' as a short form encoding four spaces. + wrap: bool = False + Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. + ignore: Py_buffer(c_default="NULL", py_default="b''") = None + An optional bytes-like object with input characters to be ignored. + +Decode Ascii85 data. +[clinic start generated code]*/ + +static PyObject * +binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, Py_buffer *ignore) +/*[clinic end generated code: output=6ab30f2a26d301a1 input=11c60c016d4f334b]*/ +{ + const unsigned char *ascii_data, *ignore_data; + unsigned char *bin_data; + int group_pos = 0; + unsigned char this_ch, this_digit; + unsigned char ignore_map[256] = {0}; + uint32_t leftchar = 0; + Py_ssize_t ascii_len, bin_len, chunk_len, ignore_len; + _PyBytesWriter writer; + binascii_state *state; + + ascii_data = data->buf; + ascii_len = data->len; + + assert(ascii_len >= 0); + + /* Consume Ascii85 prefix and suffix if present. */ + if (wrap) { + if (ascii_len < 2 || + ascii_data[ascii_len - 2] != BASE85_A85_AFFIX || + ascii_data[ascii_len - 1] != BASE85_A85_SUFFIX) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Expected Ascii85 data to end with '~>'"); + } + return NULL; + } + ascii_len -= 2; + if (ascii_len >= 2 && + ascii_data[0] == BASE85_A85_PREFIX && + ascii_data[1] == BASE85_A85_AFFIX) { + ascii_data += 2; + ascii_len -= 2; + } + } + + /* Allocate output buffer. */ + bin_len = ascii_len; + for (Py_ssize_t i = 0; i < ascii_len; i++) { + this_ch = ascii_data[i]; + if (this_ch == 'y' || this_ch == 'z') { + bin_len += 4; + } + } + bin_len = 4 * ((bin_len + 4) / 5); + + _PyBytesWriter_Init(&writer); + bin_data = _PyBytesWriter_Alloc(&writer, bin_len); + if (bin_data == NULL) { + return NULL; + } + + /* Build ignore map. */ + if (ignore->obj != NULL) { + ignore_data = ignore->buf; + ignore_len = ignore->len; + for (Py_ssize_t i = 0; i < ignore_len; i++) { + this_ch = ignore_data[i]; + ignore_map[this_ch] = -1; + } + } + + for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { + /* Shift (in radix-85) data or padding into our buffer. */ + if (ascii_len > 0) { + this_ch = *ascii_data; + this_digit = table_a2b_base85_a85[this_ch]; + } else { + /* Pad with largest radix-85 digit when decoding. */ + this_digit = 84; + } + if (this_digit < 85) { + if (leftchar > UINT32_MAX / 85 || + (leftchar *= 85) > UINT32_MAX - this_digit) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, "Ascii85 overflow"); + } + goto error_end; + } + leftchar += this_digit; + group_pos++; + } else if ((this_ch == 'y' && fold_spaces) || this_ch == 'z') { + if (group_pos != 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + "'%c' inside Ascii85 5-tuple", this_ch); + } + goto error_end; + } + leftchar = this_ch == 'y' ? BASE85_A85_Y : BASE85_A85_Z; + group_pos = 5; + } else if (!ignore_map[this_ch]) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, "'%c' invalid in Ascii85", this_ch); + } + goto error_end; + } + + /* Wait until buffer is full. */ + if (group_pos != 5) { + continue; + } + + /* Treat encoded length of 1 mod 5 as an error. */ + if (ascii_len == -3) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Ascii85 data has invalid length"); + } + goto error_end; + } + + /* Write current chunk. */ + chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; + for (Py_ssize_t i = 0; i < chunk_len; i++) { + *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; + } + + group_pos = 0; + leftchar = 0; + } + + return _PyBytesWriter_Finish(&writer, bin_data); + +error_end: + _PyBytesWriter_Dealloc(&writer); + return NULL; +} + +/*[clinic input] +binascii.b2a_ascii85 + + data: Py_buffer + / + * + fold_spaces: bool = False + Emit 'y' as a short form encoding four spaces. + wrap: bool = False + Wrap result in '<~' and '~>' as in Adobe Ascii85. + width: unsigned_int(bitwise=True) = 0 + Split result into lines of provided width. + pad: bool = False + Pad input to a multiple of 4 before encoding. + +Ascii85-encode data. +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, unsigned int width, int pad) +/*[clinic end generated code: output=78426392ad3fc75b input=d5122dbab4dbb9f2]*/ +{ + unsigned char *ascii_data; + const unsigned char *bin_data; + int chunk_pos = 0; + unsigned char this_group[5]; + uint32_t leftchar = 0; + unsigned int line_len = 0; + Py_ssize_t bin_len, group_len, out_len; + _PyBytesWriter writer; + + bin_data = data->buf; + bin_len = data->len; + + assert(bin_len >= 0); + + if (wrap && width == 1) { + width = 2; + } + + /* Allocate output buffer. + XXX: Do a pre-pass above some threshold estimate (cf. 'yz')? + */ + out_len = 5 * ((bin_len + 3) / 4); + if (wrap) out_len += 4; + if (!pad && (bin_len % 4)) out_len -= 4 - (bin_len % 4); + if (width && out_len) out_len += (out_len - 1) / width; + + _PyBytesWriter_Init(&writer); + ascii_data = _PyBytesWriter_Alloc(&writer, out_len); + if (ascii_data == NULL) { + return NULL; + } + + if (wrap) { + *ascii_data++ = BASE85_A85_PREFIX; + *ascii_data++ = BASE85_A85_AFFIX; + line_len = 2; + } + + for (; bin_len > 0 || chunk_pos != 0; bin_len--, bin_data++) { + /* Shift data or padding into our buffer. */ + leftchar <<= 8; /* Pad with zero when encoding. */ + if (bin_len > 0) { + leftchar |= *bin_data; + } + + /* Wait until buffer is full. */ + if (++chunk_pos != 4) { + continue; + } + + /* Encode current chunk. */ + if (((bin_len > 0 || pad) && leftchar == BASE85_A85_Z) || + (fold_spaces && leftchar == BASE85_A85_Y)) { + this_group[0] = leftchar == BASE85_A85_Y ? 'y' : 'z'; + group_len = 1; + leftchar = 0; + } else { + group_len = bin_len > 0 || pad ? 5 : 4 + bin_len; + for (Py_ssize_t i = 4; i >= 0; i--) { + this_group[i] = table_b2a_base85_a85[leftchar % 85]; + leftchar /= 85; + } + } + + /* Write current group. */ + for (Py_ssize_t i = 0; i < group_len; i++) { + if (width && line_len == width) { + *ascii_data++ = '\n'; + line_len = 0; + } + *ascii_data++ = this_group[i]; + line_len++; + } + + chunk_pos = 0; + } + + if (wrap) { + if (width && line_len + 2 > width) { + *ascii_data++ = '\n'; + } + *ascii_data++ = BASE85_A85_AFFIX; + *ascii_data++ = BASE85_A85_SUFFIX; + } + + return _PyBytesWriter_Finish(&writer, ascii_data); +} + +/*[clinic input] +binascii.a2b_base85 + + data: ascii_buffer + / + * + strict_mode: bool = False + When set to True, bytes that are not in the base85 alphabet + (or the Z85 alphabet, if z85 is True) are not allowed. + z85: bool = False + When set to True, the Z85 alphabet is used instead of the standard + base85 alphabet. + +Decode a line of base85 data. +[clinic start generated code]*/ + +static PyObject * +binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode, + int z85) +/*[clinic end generated code: output=c5b9118ffe77f1cb input=65c2a532ad64ebd5]*/ +{ + const unsigned char *ascii_data, *table_a2b; + unsigned char *bin_data; + int group_pos = 0; + unsigned char this_ch, this_digit; + uint32_t leftchar = 0; + Py_ssize_t ascii_len, bin_len, chunk_len; + _PyBytesWriter writer; + binascii_state *state; + + table_a2b = z85 ? table_a2b_base85_z85 : table_a2b_base85; + ascii_data = data->buf; + ascii_len = data->len; + + assert(ascii_len >= 0); + + /* Allocate output buffer. */ + bin_len = 4 * ((ascii_len + 4) / 5); + + _PyBytesWriter_Init(&writer); + bin_data = _PyBytesWriter_Alloc(&writer, bin_len); + if (bin_data == NULL) { + return NULL; + } + + for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { + /* Shift (in radix-85) data or padding into our buffer. */ + if (ascii_len > 0) { + this_ch = *ascii_data; + this_digit = table_a2b[this_ch]; + } else { + /* Pad with largest radix-85 digit when decoding. */ + this_digit = 84; + } + if (this_digit < 85) { + if (leftchar > UINT32_MAX / 85 || + (leftchar *= 85) > UINT32_MAX - this_digit) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + z85 ? "z85 overflow" : "base85 overflow"); + } + goto error_end; + } + leftchar += this_digit; + group_pos++; + } else if (strict_mode) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, "'%c' %s", this_ch, + z85 ? "invalid in z85" : "invalid in base85"); + } + goto error_end; + } + + /* Wait until buffer is full. */ + if (group_pos != 5) { + continue; + } + + /* Treat encoded length of 1 mod 5 as an error. */ + if (ascii_len == -3) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_Format(state->Error, + z85 ? "z85 data has invalid length" + : "base85 data has invalid length"); + } + goto error_end; + } + + /* Write current chunk. */ + chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; + for (Py_ssize_t i = 0; i < chunk_len; i++) { + *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; + } + + group_pos = 0; + leftchar = 0; + } + + return _PyBytesWriter_Finish(&writer, bin_data); + +error_end: + _PyBytesWriter_Dealloc(&writer); + return NULL; +} + +/*[clinic input] +binascii.b2a_base85 + + data: Py_buffer + / + * + pad: bool = False + Pad input to a multiple of 4 before encoding. + newline: bool = True + Append a newline to the result. + z85: bool = False + Use Z85 alphabet instead of standard base85 alphabet. + +Base85-code line of data. +[clinic start generated code]*/ + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, + int newline, int z85) +/*[clinic end generated code: output=d3740e9a20c8e071 input=e4e07591f7a11ae4]*/ +{ + unsigned char *ascii_data; + const unsigned char *bin_data, *table_b2a; + uint32_t leftchar = 0; + Py_ssize_t bin_len, group_len, out_len; + _PyBytesWriter writer; + + table_b2a = z85 ? table_b2a_base85_z85 : table_b2a_base85; + bin_data = data->buf; + bin_len = data->len; + + assert(bin_len >= 0); + + /* Allocate output buffer. */ + out_len = 5 * ((bin_len + 3) / 4); + if (!pad && (bin_len % 4)) out_len -= 4 - (bin_len % 4); + if (newline) out_len++; + + _PyBytesWriter_Init(&writer); + ascii_data = _PyBytesWriter_Alloc(&writer, out_len); + if (ascii_data == NULL) { + return NULL; + } + + /* Encode all full-length chunks. */ + for (; bin_len >= 4; bin_len -= 4, bin_data += 4) { + leftchar = (bin_data[0] << 24) | (bin_data[1] << 16) | + (bin_data[2] << 8) | bin_data[3]; + + ascii_data[4] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[3] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[2] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[1] = table_b2a[leftchar % 85]; + leftchar /= 85; + ascii_data[0] = table_b2a[leftchar]; + + ascii_data += 5; + } + + /* Encode partial-length final chunk. */ + if (bin_len > 0) { + for (Py_ssize_t i = 0; i < 4; i++) { + leftchar <<= 8; /* Pad with zero when encoding. */ + if (i < bin_len) { + leftchar |= *bin_data++; + } + } + group_len = pad ? 5 : bin_len + 1; + for (Py_ssize_t i = 4; i >= 0; i--) { + if (i < group_len) { + ascii_data[i] = table_b2a[leftchar % 85]; + } + leftchar /= 85; + } + ascii_data += group_len; + } + + if (newline) { + *ascii_data++ = '\n'; + } + + return _PyBytesWriter_Finish(&writer, ascii_data); +} /*[clinic input] binascii.crc_hqx @@ -1246,6 +1783,10 @@ static struct PyMethodDef binascii_module_methods[] = { BINASCII_B2A_UU_METHODDEF BINASCII_A2B_BASE64_METHODDEF BINASCII_B2A_BASE64_METHODDEF + BINASCII_B2A_ASCII85_METHODDEF + BINASCII_A2B_ASCII85_METHODDEF + BINASCII_A2B_BASE85_METHODDEF + BINASCII_B2A_BASE85_METHODDEF BINASCII_A2B_HEX_METHODDEF BINASCII_B2A_HEX_METHODDEF BINASCII_HEXLIFY_METHODDEF diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 602e42a4c1aaa4..c119c07bb6f22c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -267,6 +267,423 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P return return_value; } +PyDoc_STRVAR(binascii_a2b_ascii85__doc__, +"a2b_ascii85($module, data, /, *, fold_spaces=False, wrap=False,\n" +" ignore=b\'\')\n" +"--\n" +"\n" +"Decode Ascii85 data.\n" +"\n" +" fold_spaces\n" +" Allow \'y\' as a short form encoding four spaces.\n" +" wrap\n" +" Expect data to be wrapped in \'<~\' and \'~>\' as in Adobe Ascii85.\n" +" ignore\n" +" An optional bytes-like object with input characters to be ignored."); + +#define BINASCII_A2B_ASCII85_METHODDEF \ + {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, + +static PyObject * +binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, Py_buffer *ignore); + +static PyObject * +binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(fold_spaces), &_Py_ID(wrap), &_Py_ID(ignore), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "fold_spaces", "wrap", "ignore", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_ascii85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int fold_spaces = 0; + int wrap = 0; + Py_buffer ignore = {NULL, NULL}; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + fold_spaces = PyObject_IsTrue(args[1]); + if (fold_spaces < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + wrap = PyObject_IsTrue(args[2]); + if (wrap < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (PyObject_GetBuffer(args[3], &ignore, PyBUF_SIMPLE) != 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_ascii85_impl(module, &data, fold_spaces, wrap, &ignore); + +exit: + /* Cleanup for data */ + if (data.obj) + PyBuffer_Release(&data); + /* Cleanup for ignore */ + if (ignore.obj) { + PyBuffer_Release(&ignore); + } + + return return_value; +} + +PyDoc_STRVAR(binascii_b2a_ascii85__doc__, +"b2a_ascii85($module, data, /, *, fold_spaces=False, wrap=False,\n" +" width=0, pad=False)\n" +"--\n" +"\n" +"Ascii85-encode data.\n" +"\n" +" fold_spaces\n" +" Emit \'y\' as a short form encoding four spaces.\n" +" wrap\n" +" Wrap result in \'<~\' and \'~>\' as in Adobe Ascii85.\n" +" width\n" +" Split result into lines of provided width.\n" +" pad\n" +" Pad input to a multiple of 4 before encoding."); + +#define BINASCII_B2A_ASCII85_METHODDEF \ + {"b2a_ascii85", _PyCFunction_CAST(binascii_b2a_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_ascii85__doc__}, + +static PyObject * +binascii_b2a_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, + int wrap, unsigned int width, int pad); + +static PyObject * +binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(fold_spaces), &_Py_ID(wrap), &_Py_ID(width), &_Py_ID(pad), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "fold_spaces", "wrap", "width", "pad", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_ascii85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[5]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int fold_spaces = 0; + int wrap = 0; + unsigned int width = 0; + int pad = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + fold_spaces = PyObject_IsTrue(args[1]); + if (fold_spaces < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + wrap = PyObject_IsTrue(args[2]); + if (wrap < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[3]) { + width = (unsigned int)PyLong_AsUnsignedLongMask(args[3]); + if (width == (unsigned int)-1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + pad = PyObject_IsTrue(args[4]); + if (pad < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_b2a_ascii85_impl(module, &data, fold_spaces, wrap, width, pad); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + +PyDoc_STRVAR(binascii_a2b_base85__doc__, +"a2b_base85($module, data, /, *, strict_mode=False, z85=False)\n" +"--\n" +"\n" +"Decode a line of base85 data.\n" +"\n" +" strict_mode\n" +" When set to True, bytes that are not in the base85 alphabet\n" +" (or the Z85 alphabet, if z85 is True) are not allowed.\n" +" z85\n" +" When set to True, the Z85 alphabet is used instead of the standard\n" +" base85 alphabet."); + +#define BINASCII_A2B_BASE85_METHODDEF \ + {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, + +static PyObject * +binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode, + int z85); + +static PyObject * +binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(strict_mode), &_Py_ID(z85), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "strict_mode", "z85", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "a2b_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[3]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int strict_mode = 0; + int z85 = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!ascii_buffer_converter(args[0], &data)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + strict_mode = PyObject_IsTrue(args[1]); + if (strict_mode < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + z85 = PyObject_IsTrue(args[2]); + if (z85 < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_a2b_base85_impl(module, &data, strict_mode, z85); + +exit: + /* Cleanup for data */ + if (data.obj) + PyBuffer_Release(&data); + + return return_value; +} + +PyDoc_STRVAR(binascii_b2a_base85__doc__, +"b2a_base85($module, data, /, *, pad=False, newline=True, z85=False)\n" +"--\n" +"\n" +"Base85-code line of data.\n" +"\n" +" pad\n" +" Pad input to a multiple of 4 before encoding.\n" +" newline\n" +" Append a newline to the result.\n" +" z85\n" +" Use Z85 alphabet instead of standard base85 alphabet."); + +#define BINASCII_B2A_BASE85_METHODDEF \ + {"b2a_base85", _PyCFunction_CAST(binascii_b2a_base85), METH_FASTCALL|METH_KEYWORDS, binascii_b2a_base85__doc__}, + +static PyObject * +binascii_b2a_base85_impl(PyObject *module, Py_buffer *data, int pad, + int newline, int z85); + +static PyObject * +binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 3 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pad), &_Py_ID(newline), &_Py_ID(z85), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"", "pad", "newline", "z85", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "b2a_base85", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + Py_buffer data = {NULL, NULL}; + int pad = 0; + int newline = 1; + int z85 = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (PyObject_GetBuffer(args[0], &data, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + if (args[1]) { + pad = PyObject_IsTrue(args[1]); + if (pad < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (args[2]) { + newline = PyObject_IsTrue(args[2]); + if (newline < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + z85 = PyObject_IsTrue(args[3]); + if (z85 < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = binascii_b2a_base85_impl(module, &data, pad, newline, z85); + +exit: + /* Cleanup for data */ + if (data.obj) { + PyBuffer_Release(&data); + } + + return return_value; +} + PyDoc_STRVAR(binascii_crc_hqx__doc__, "crc_hqx($module, data, crc, /)\n" "--\n" @@ -788,4 +1205,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=adb855a2797c3cad input=a9049054013a1b77]*/ +/*[clinic end generated code: output=95db68a6c51e7370 input=a9049054013a1b77]*/ From aa06c5dcfcab69e5e6f618d346b9531145e5c2c8 Mon Sep 17 00:00:00 2001 From: James Seo Date: Sat, 26 Apr 2025 06:37:59 -0700 Subject: [PATCH 02/12] Restore base64.py --- Lib/base64.py | 179 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 171 insertions(+), 8 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 20542eeb447c55..5d78cc09f40cd3 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -290,6 +290,36 @@ def b16decode(s, casefold=False): # # Ascii85 encoding/decoding # + +_a85chars = None +_a85chars2 = None +_A85START = b"<~" +_A85END = b"~>" + +def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): + # Helper function for a85encode and b85encode + if not isinstance(b, bytes_types): + b = memoryview(b).tobytes() + + padding = (-len(b)) % 4 + if padding: + b = b + b'\0' * padding + words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) + + chunks = [b'z' if foldnuls and not word else + b'y' if foldspaces and word == 0x20202020 else + (chars2[word // 614125] + + chars2[word // 85 % 7225] + + chars[word % 85]) + for word in words] + + if padding and not pad: + if chunks[-1] == b'z': + chunks[-1] = chars[0] * 5 + chunks[-1] = chunks[-1][:-padding] + + return b''.join(chunks) + def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): """Encode bytes-like object b using Ascii85 and return a bytes object. @@ -307,8 +337,29 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): adobe controls whether the encoded byte sequence is framed with <~ and ~>, which is used by the Adobe implementation. """ - return binascii.b2a_ascii85(b, fold_spaces=foldspaces, - wrap=adobe, width=wrapcol, pad=pad) + global _a85chars, _a85chars2 + # Delay the initialization of tables to not waste memory + # if the function is never called + if _a85chars2 is None: + _a85chars = [bytes((i,)) for i in range(33, 118)] + _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] + + result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) + + if adobe: + result = _A85START + result + if wrapcol: + wrapcol = max(2 if adobe else 1, wrapcol) + chunks = [result[i: i + wrapcol] + for i in range(0, len(result), wrapcol)] + if adobe: + if len(chunks[-1]) + 2 > wrapcol: + chunks.append(b'') + result = b'\n'.join(chunks) + if adobe: + result += _A85END + + return result def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): """Decode the Ascii85 encoded bytes-like object or ASCII string b. @@ -327,8 +378,66 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): The result is returned as a bytes object. """ b = _bytes_from_decode_data(b) - return binascii.a2b_ascii85(b, fold_spaces=foldspaces, - wrap=adobe, ignore=ignorechars) + if adobe: + if not b.endswith(_A85END): + raise ValueError( + "Ascii85 encoded byte sequences must end " + "with {!r}".format(_A85END) + ) + if b.startswith(_A85START): + b = b[2:-2] # Strip off start/end markers + else: + b = b[:-2] + # + # We have to go through this stepwise, so as to ignore spaces and handle + # special short sequences + # + packI = struct.Struct('!I').pack + decoded = [] + decoded_append = decoded.append + curr = [] + curr_append = curr.append + curr_clear = curr.clear + for x in b + b'u' * 4: + if b'!'[0] <= x <= b'u'[0]: + curr_append(x) + if len(curr) == 5: + acc = 0 + for x in curr: + acc = 85 * acc + (x - 33) + try: + decoded_append(packI(acc)) + except struct.error: + raise ValueError('Ascii85 overflow') from None + curr_clear() + elif x == b'z'[0]: + if curr: + raise ValueError('z inside Ascii85 5-tuple') + decoded_append(b'\0\0\0\0') + elif foldspaces and x == b'y'[0]: + if curr: + raise ValueError('y inside Ascii85 5-tuple') + decoded_append(b'\x20\x20\x20\x20') + elif x in ignorechars: + # Skip whitespace + continue + else: + raise ValueError('Non-Ascii85 digit found: %c' % x) + + result = b''.join(decoded) + padding = 4 - len(curr) + if padding: + # Throw away the extra padding + result = result[:-padding] + return result + +# The following code is originally taken (with permission) from Mercurial + +_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" + b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") +_b85chars = None +_b85chars2 = None +_b85dec = None def b85encode(b, pad=False): """Encode bytes-like object b in base85 format and return a bytes object. @@ -336,19 +445,69 @@ def b85encode(b, pad=False): If pad is true, the input is padded with b'\\0' so its length is a multiple of 4 bytes before encoding. """ - return binascii.b2a_base85(b, pad=pad, newline=False) + global _b85chars, _b85chars2 + # Delay the initialization of tables to not waste memory + # if the function is never called + if _b85chars2 is None: + _b85chars = [bytes((i,)) for i in _b85alphabet] + _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] + return _85encode(b, _b85chars, _b85chars2, pad) def b85decode(b): """Decode the base85-encoded bytes-like object or ASCII string b The result is returned as a bytes object. """ + global _b85dec + # Delay the initialization of tables to not waste memory + # if the function is never called + if _b85dec is None: + _b85dec = [None] * 256 + for i, c in enumerate(_b85alphabet): + _b85dec[c] = i + b = _bytes_from_decode_data(b) - return binascii.a2b_base85(b, strict_mode=True) + padding = (-len(b)) % 5 + b = b + b'~' * padding + out = [] + packI = struct.Struct('!I').pack + for i in range(0, len(b), 5): + chunk = b[i:i + 5] + acc = 0 + try: + for c in chunk: + acc = acc * 85 + _b85dec[c] + except TypeError: + for j, c in enumerate(chunk): + if _b85dec[c] is None: + raise ValueError('bad base85 character at position %d' + % (i + j)) from None + raise + try: + out.append(packI(acc)) + except struct.error: + raise ValueError('base85 overflow in hunk starting at byte %d' + % i) from None + + result = b''.join(out) + if padding: + result = result[:-padding] + return result + +_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') +# Translating b85 valid but z85 invalid chars to b'\x00' is required +# to prevent them from being decoded as b85 valid chars. +_z85_b85_decode_diff = b';_`|~' +_z85_decode_translation = bytes.maketrans( + _z85alphabet + _z85_b85_decode_diff, + _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) +) +_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) def z85encode(s): """Encode bytes-like object b in z85 format and return a bytes object.""" - return binascii.b2a_base85(s, newline=False, z85=True) + return b85encode(s).translate(_z85_encode_translation) def z85decode(s): """Decode the z85-encoded bytes-like object or ASCII string b @@ -356,7 +515,11 @@ def z85decode(s): The result is returned as a bytes object. """ s = _bytes_from_decode_data(s) - return binascii.a2b_base85(s, strict_mode=True, z85=True) + s = s.translate(_z85_decode_translation) + try: + return b85decode(s) + except ValueError as e: + raise ValueError(e.args[0].replace('base85', 'z85')) from None # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it From 63774406fb7dd6a5bf6a39cd55596c748840e517 Mon Sep 17 00:00:00 2001 From: James Seo Date: Sat, 26 Apr 2025 06:38:48 -0700 Subject: [PATCH 03/12] Create _base64 module with wrappers for accelerated functions If we were strictly following PEP-0399, _base64 would be a C module for accelerated functions in base64. Due to historical reasons, those should actually go in binascii instead. We still want to preserve the existing Python code in base64. Parting out facilities for accessing the C functions into a module named _base64 shouldn't risk a naming conflict and will simplify testing. --- Lib/_base64.py | 33 +++++++++++++++++++++++++++++++++ Lib/base64.py | 7 +++++++ 2 files changed, 40 insertions(+) create mode 100644 Lib/_base64.py diff --git a/Lib/_base64.py b/Lib/_base64.py new file mode 100644 index 00000000000000..ad21c4f5d87c7d --- /dev/null +++ b/Lib/_base64.py @@ -0,0 +1,33 @@ +"""C accelerator wrappers for originally pure-Python parts of base64.""" + +from binascii import a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85 + +__all__ = ['a85encode', 'a85decode', + 'b85encode', 'b85decode', + 'z85encode', 'z85decode'] + + +def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): + return b2a_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, width=wrapcol, pad=pad) + + +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): + return a2b_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, ignore=ignorechars) + + +def b85encode(b, pad=False): + return b2a_base85(b, pad=pad, newline=False) + + +def b85decode(b): + return a2b_base85(b, strict_mode=True) + + +def z85encode(s): + return b2a_base85(s, newline=False, z85=True) + + +def z85decode(s): + return a2b_base85(s, strict_mode=True, z85=True) diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40cd3..9f99c73bf6ae1f 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -576,6 +576,13 @@ def decodebytes(s): return binascii.a2b_base64(s) +# Use accelerated implementations of originally pure-Python parts if possible. +try: + from _base64 import * +except ImportError: + pass + + # Usable as a script... def main(): """Small main program""" From 6c0e4a3089a90a21e35e48731ca5a976acf4095a Mon Sep 17 00:00:00 2001 From: James Seo Date: Sat, 26 Apr 2025 06:57:57 -0700 Subject: [PATCH 04/12] Test both Python and C codepaths in base64 This is done differently to PEP-0399 to minimize the number of changed lines. --- Lib/test/test_base64.py | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 409c8c109e885f..0f7cf82a858b78 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -1,10 +1,37 @@ import unittest -import base64 import binascii import os from array import array +from functools import update_wrapper from test.support import os_helper from test.support import script_helper +from test.support.import_helper import import_fresh_module + +base64 = import_fresh_module("base64", blocked=["_base64"]) +c_base64 = import_fresh_module("base64", fresh=["_base64"]) + + +def with_c_implementation(test_func): + if c_base64 is None: + return test_func + + def _test_func(self): + global base64 + + # Test Python implementation + test_func(self) + + # Test C implementation + base64_ = base64 + try: + base64 = c_base64 + test_func(self) + finally: + base64 = base64_ + + update_wrapper(_test_func, test_func) + + return _test_func class LegacyBase64TestCase(unittest.TestCase): @@ -461,6 +488,7 @@ def test_b16decode(self): # Incorrect "padding" self.assertRaises(binascii.Error, base64.b16decode, '010') + @with_c_implementation def test_a85encode(self): eq = self.assertEqual @@ -511,6 +539,7 @@ def test_a85encode(self): eq(base64.a85encode(b' '*6, foldspaces=True, adobe=False), b'y+', b"www.python.org") + @with_c_implementation def test_b85decode(self): eq = self.assertEqual @@ -660,6 +692,7 @@ def test_b85decode(self): self.check_other_types(base64.b85decode, b'cXxL#aCvlSZ*DGca%T', b"www.python.org") + @with_c_implementation def test_z85decode(self): eq = self.assertEqual @@ -695,6 +728,7 @@ def test_z85decode(self): self.check_other_types(base64.z85decode, b'CxXl-AcVLsz/dgCA+t', b'www.python.org') + @with_c_implementation def test_a85_padding(self): eq = self.assertEqual @@ -710,6 +744,7 @@ def test_a85_padding(self): eq(base64.a85decode(b'G^+IX'), b"xxxx") eq(base64.a85decode(b'G^+IXGQ7^D'), b"xxxxx\x00\x00\x00") + @with_c_implementation def test_b85_padding(self): eq = self.assertEqual @@ -725,6 +760,7 @@ def test_b85_padding(self): eq(base64.b85decode(b'czAet'), b"xxxx") eq(base64.b85decode(b'czAetcmMzZ'), b"xxxxx\x00\x00\x00") + @with_c_implementation def test_a85decode_errors(self): illegal = (set(range(32)) | set(range(118, 256))) - set(b' \t\n\r\v') for c in illegal: @@ -762,6 +798,7 @@ def test_a85decode_errors(self): self.assertRaises(ValueError, base64.a85decode, b'aaaay', foldspaces=True) + @with_c_implementation def test_b85decode_errors(self): illegal = list(range(33)) + \ list(b'"\',./:[\\]') + \ @@ -776,6 +813,7 @@ def test_b85decode_errors(self): self.assertRaises(ValueError, base64.b85decode, b'|NsC') self.assertRaises(ValueError, base64.b85decode, b'|NsC1') + @with_c_implementation def test_z85decode_errors(self): illegal = list(range(33)) + \ list(b'"\',;_`|\\~') + \ From ce4773c34cd398416becc5cd550a37d3d7bb62dc Mon Sep 17 00:00:00 2001 From: James Seo Date: Sat, 26 Apr 2025 08:54:30 -0700 Subject: [PATCH 05/12] Match behavior between Python and C base 85 functions As we're now keeping the existing Python base 85 functions, the C implementations should behave exactly the same, down to exception type and wording. It is also no longer an error to try to decode data of length 1 mod 5. --- Lib/_base64.py | 44 +++++++++++++++++++++++++-------- Lib/base64.py | 16 +++++++++++- Lib/test/test_binascii.py | 52 +++++++++++++++++++-------------------- Modules/binascii.c | 36 +++++++-------------------- 4 files changed, 83 insertions(+), 65 deletions(-) diff --git a/Lib/_base64.py b/Lib/_base64.py index ad21c4f5d87c7d..d1130ee421ab77 100644 --- a/Lib/_base64.py +++ b/Lib/_base64.py @@ -1,33 +1,57 @@ """C accelerator wrappers for originally pure-Python parts of base64.""" -from binascii import a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85 +from binascii import Error, a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85 +from base64 import _bytes_from_decode_data, bytes_types -__all__ = ['a85encode', 'a85decode', - 'b85encode', 'b85decode', - 'z85encode', 'z85decode'] +# Base 85 encoder functions in base64 silently convert input to bytes. +def _bytes_from_encode_data(b): + return b if isinstance(b, bytes_types) else memoryview(b).tobytes() -def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): + +# Functions in binascii raise binascii.Error instead of ValueError. +def raise_valueerror(func): + def _func(*args, **kwargs): + try: + return func(*args, **kwargs) + except Error as e: + raise ValueError(e) from None + return _func + + +@raise_valueerror +def _a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): + b = _bytes_from_encode_data(b) return b2a_ascii85(b, fold_spaces=foldspaces, wrap=adobe, width=wrapcol, pad=pad) -def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): +@raise_valueerror +def _a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): + b = _bytes_from_decode_data(b) return a2b_ascii85(b, fold_spaces=foldspaces, wrap=adobe, ignore=ignorechars) -def b85encode(b, pad=False): +@raise_valueerror +def _b85encode(b, pad=False): + b = _bytes_from_encode_data(b) return b2a_base85(b, pad=pad, newline=False) -def b85decode(b): +@raise_valueerror +def _b85decode(b): + b = _bytes_from_decode_data(b) return a2b_base85(b, strict_mode=True) -def z85encode(s): +@raise_valueerror +def _z85encode(s): + s = _bytes_from_encode_data(s) return b2a_base85(s, newline=False, z85=True) -def z85decode(s): +@raise_valueerror +def _z85decode(s): + s = _bytes_from_decode_data(s) return a2b_base85(s, strict_mode=True, z85=True) diff --git a/Lib/base64.py b/Lib/base64.py index 9f99c73bf6ae1f..83990b7053ce29 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -578,7 +578,21 @@ def decodebytes(s): # Use accelerated implementations of originally pure-Python parts if possible. try: - from _base64 import * + from _base64 import (_a85encode, _a85decode, _b85encode, + _b85decode, _z85encode, _z85decode) + from functools import update_wrapper + update_wrapper(_a85encode, a85encode) + update_wrapper(_a85decode, a85decode) + update_wrapper(_b85encode, b85encode) + update_wrapper(_b85decode, b85decode) + update_wrapper(_z85encode, z85encode) + update_wrapper(_z85decode, z85decode) + a85encode = _a85encode + a85decode = _a85decode + b85encode = _b85encode + b85decode = _b85decode + z85encode = _z85encode + z85decode = _z85decode except ImportError: pass diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 56a6a6d989133b..c9f6c1eff01690 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -242,6 +242,21 @@ def test_ascii85_valid(self): res += b self.assertEqual(res, rawdata) + # Test decoding inputs with length 1 mod 5 + params = [ + (b"a", False, False, b"", b""), + (b"xbw", False, False, b"wx", b""), + (b"<~c~>", False, True, b"", b""), + (b"{d ~>", False, True, b" {", b""), + (b"ye", True, False, b"", b" "), + (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "), + (b"<~FCfN8yg~>", True, True, b"", b"test "), + (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "), + ] + for a, fold_spaces, wrap, ignore, b in params: + kwargs = {"fold_spaces": fold_spaces, "wrap": wrap, "ignore": ignore} + self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b) + def test_ascii85_invalid(self): # Test Ascii85 with invalid characters interleaved lines, i = [], 0 @@ -284,19 +299,16 @@ def _assertRegexTemplate(assert_regex, data, **kwargs): binascii.a2b_ascii85(self.type2test(data), **kwargs) def assertMissingDelimiter(data): - _assertRegexTemplate(r"(?i)end with '~>'", data, wrap=True) + _assertRegexTemplate(r"(?i)end with b'~>'", data, wrap=True) def assertOverflow(data): - _assertRegexTemplate(r"(?i)85 overflow", data) + _assertRegexTemplate(r"(?i)Ascii85 overflow", data) def assertInvalidSpecial(data): _assertRegexTemplate(r"(?i)'[yz]'.+5-tuple", data, fold_spaces=True) def assertInvalidChar(data, **kwargs): - _assertRegexTemplate(r"(?i)invalid in Ascii85", data, **kwargs) - - def assertInvalidLength(data): - _assertRegexTemplate(r"(?i)invalid length", data) + _assertRegexTemplate(r"(?i)Non-Ascii85 digit", data, **kwargs) # Test Ascii85 with missing delimiters assertMissingDelimiter(b"") @@ -331,15 +343,6 @@ def assertInvalidLength(data): assertInvalidChar(b"\tFCb", ignore=b"\n") assertInvalidChar(b"xxxB\nP\thU'D v/F+", ignore=b" \n\tv") - # Test Ascii85 with invalid length of final group (1 mod 5) - assertInvalidLength(b"a") - assertInvalidLength(b"b") - assertInvalidLength(b"zc") - assertInvalidLength(b"zza") - assertInvalidLength(b"!!!!!a") - assertInvalidLength(b"+Error, - "Expected Ascii85 data to end with '~>'"); + "Ascii85 encoded byte sequences must end with b'~>'"); } return NULL; } @@ -782,7 +782,8 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, } else if (!ignore_map[this_ch]) { state = get_binascii_state(module); if (state != NULL) { - PyErr_Format(state->Error, "'%c' invalid in Ascii85", this_ch); + PyErr_Format(state->Error, + "Non-Ascii85 digit found: %c", this_ch); } goto error_end; } @@ -792,16 +793,6 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int fold_spaces, continue; } - /* Treat encoded length of 1 mod 5 as an error. */ - if (ascii_len == -3) { - state = get_binascii_state(module); - if (state != NULL) { - PyErr_SetString(state->Error, - "Ascii85 data has invalid length"); - } - goto error_end; - } - /* Write current chunk. */ chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; for (Py_ssize_t i = 0; i < chunk_len; i++) { @@ -989,8 +980,10 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode, (leftchar *= 85) > UINT32_MAX - this_digit) { state = get_binascii_state(module); if (state != NULL) { - PyErr_SetString(state->Error, - z85 ? "z85 overflow" : "base85 overflow"); + PyErr_Format(state->Error, + "%s overflow in hunk starting at byte %d", + z85 ? "z85" : "base85", + (data->len - ascii_len) / 5 * 5); } goto error_end; } @@ -999,8 +992,8 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode, } else if (strict_mode) { state = get_binascii_state(module); if (state != NULL) { - PyErr_Format(state->Error, "'%c' %s", this_ch, - z85 ? "invalid in z85" : "invalid in base85"); + PyErr_Format(state->Error, "bad %s character at position %d", + z85 ? "z85" : "base85", data->len - ascii_len); } goto error_end; } @@ -1010,17 +1003,6 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, int strict_mode, continue; } - /* Treat encoded length of 1 mod 5 as an error. */ - if (ascii_len == -3) { - state = get_binascii_state(module); - if (state != NULL) { - PyErr_Format(state->Error, - z85 ? "z85 data has invalid length" - : "base85 data has invalid length"); - } - goto error_end; - } - /* Write current chunk. */ chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; for (Py_ssize_t i = 0; i < chunk_len; i++) { From 4072e3bb8139ffa173a7822c465962a884a5e515 Mon Sep 17 00:00:00 2001 From: James Seo Date: Sat, 26 Apr 2025 19:54:51 -0700 Subject: [PATCH 06/12] Add Z85 tests to binascii --- Lib/test/test_binascii.py | 105 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index c9f6c1eff01690..ac458ff8993094 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -531,12 +531,109 @@ def test_base85_newline(self): self.assertEqual(binascii.b2a_base85(b, newline=True), b"bTe}aAO\n") self.assertEqual(binascii.b2a_base85(b, newline=False), b"bTe}aAO") - def test_base85_z85(self): - # Test base85 z85 parameter + def test_base85_z85_valid(self): + # Test base85 (Z85 alphabet) with valid data + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b, z85=True) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + res = bytes() + for line in lines: + a = self.type2test(line) + b = binascii.a2b_base85(a, z85=True) + res += b + self.assertEqual(res, self.rawdata) + + # Test decoding inputs with length 1 mod 5 + self.assertEqual(binascii.a2b_base85(self.type2test(b"a"), z85=True), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b" b "), z85=True), b"") + self.assertEqual(binascii.a2b_base85(self.type2test(b"B y,/;J_n\\c"), z85=True), b"test") + + def test_base85_z85_invalid(self): + # Test base85 (Z85 alphabet) with invalid characters interleaved + lines, i = [], 0 + for k in range(1, len(self.rawdata) + 1): + b = self.type2test(self.rawdata[i:i + k]) + a = binascii.b2a_base85(b, z85=True) + lines.append(a) + i += k + if i >= len(self.rawdata): + break + + fillers = bytearray() + valid = b"0123456789abcdefghijklmnopqrstuvwxyz" \ + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#" + for i in range(256): + if i not in valid: + fillers.append(i) + def addnoise(line): + res = bytearray() + for i in range(len(line)): + res.append(line[i]) + for j in range(i, len(fillers), len(line)): + res.append(fillers[j]) + return res + res = bytearray() + for line in map(addnoise, lines): + a = self.type2test(line) + b = binascii.a2b_base85(a, z85=True) + res += b + self.assertEqual(res, self.rawdata) + + def test_base85_z85_errors(self): + def _assertRegexTemplate(assert_regex, data, **kwargs): + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_base85(self.type2test(data), z85=True, **kwargs) + + def assertOverflow(data): + _assertRegexTemplate(r"(?i)z85 overflow", data) + + # Test base85 (Z85 alphabet) with out-of-range encoded value + assertOverflow(b"%") + assertOverflow(b"%n") + assertOverflow(b"%nS") + assertOverflow(b"%nSc") + assertOverflow(b"%nSc1") + assertOverflow(b"%nSc0$") + assertOverflow(b"%nSc0%nSc0%nSD0") + + def test_base85_z85_pad(self): + # Test base85 (Z85 alphabet) with encode padding + rawdata = b"n1n3Tee\n ch@rAc\te\r$" + for i in range(1, len(rawdata) + 1): + padding = -i % 4 + b = rawdata[:i] + a_pad = binascii.b2a_base85(self.type2test(b), pad=True, z85=True) + b_pad = binascii.a2b_base85(self.type2test(a_pad), z85=True) + b_pad_expected = b + b"\0" * padding + self.assertEqual(b_pad, b_pad_expected) + + def test_base85_z85_strict_mode(self): + # Test base85 (Z85 alphabet) with strict mode on + def assertNonZ85Data(data, expected): + data = self.type2test(data) + with self.assertRaisesRegex(binascii.Error, r"(?i)bad z85 character"): + binascii.a2b_base85(data, strict_mode=True, z85=True) + default_res = binascii.a2b_base85(data, z85=True) + non_strict_res = binascii.a2b_base85(data, strict_mode=False, z85=True) + self.assertEqual(default_res, non_strict_res) + self.assertEqual(non_strict_res, expected) + + assertNonZ85Data(b"\xda", b"") + assertNonZ85Data(b"00\0\0", b"\0") + assertNonZ85Data(b"z !/", b"ok") + assertNonZ85Data(b"By/JnB0hYQ\n", b"tests!!~") + + def test_base85_z85_newline(self): + # Test base85 (Z85 alphabet) newline parameter b = self.type2test(b"t3s\t ") - a = self.type2test(b"BtE$Aao\n") self.assertEqual(binascii.b2a_base85(b, z85=True), b"BtE$Aao\n") - self.assertEqual(binascii.a2b_base85(a, z85=True), b"t3s\t ") + self.assertEqual(binascii.b2a_base85(b, newline=True, z85=True), b"BtE$Aao\n") + self.assertEqual(binascii.b2a_base85(b, newline=False, z85=True), b"BtE$Aao") def test_uu(self): MAX_UU = 45 From bc9217f157b404727651718d55149f693a3c4eba Mon Sep 17 00:00:00 2001 From: James Seo Date: Sat, 26 Apr 2025 21:03:10 -0700 Subject: [PATCH 07/12] Update generated files --- Python/stdlib_module_names.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/stdlib_module_names.h b/Python/stdlib_module_names.h index 26f6272ae9cfbc..b76ef2262d4de2 100644 --- a/Python/stdlib_module_names.h +++ b/Python/stdlib_module_names.h @@ -10,6 +10,7 @@ static const char* _Py_stdlib_module_names[] = { "_ast", "_ast_unparse", "_asyncio", +"_base64", "_bisect", "_blake2", "_bz2", From 2c40ba0971b80843f65df2f0dc02cbb52482352c Mon Sep 17 00:00:00 2001 From: James Seo Date: Sun, 27 Apr 2025 19:55:35 -0700 Subject: [PATCH 08/12] Avoid importing functools Importing update_wrapper() from functools to copy attributes is expensive. Do it ourselves for only the most relevant ones. --- Lib/base64.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Lib/base64.py b/Lib/base64.py index 83990b7053ce29..4ad32ad2d229f8 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -580,19 +580,19 @@ def decodebytes(s): try: from _base64 import (_a85encode, _a85decode, _b85encode, _b85decode, _z85encode, _z85decode) - from functools import update_wrapper - update_wrapper(_a85encode, a85encode) - update_wrapper(_a85decode, a85decode) - update_wrapper(_b85encode, b85encode) - update_wrapper(_b85decode, b85decode) - update_wrapper(_z85encode, z85encode) - update_wrapper(_z85decode, z85decode) - a85encode = _a85encode - a85decode = _a85decode - b85encode = _b85encode - b85decode = _b85decode - z85encode = _z85encode - z85decode = _z85decode + # Avoid expensive import of update_wrapper() from functools. + def _copy_attributes(func, src_func): + func.__module__ = src_func.__module__ + func.__name__ = src_func.__name__ + func.__qualname__ = src_func.__qualname__ + func.__doc__ = src_func.__doc__ + return func + a85encode = _copy_attributes(_a85encode, a85encode) + a85decode = _copy_attributes(_a85decode, a85decode) + b85encode = _copy_attributes(_b85encode, b85encode) + b85decode = _copy_attributes(_b85decode, b85decode) + z85encode = _copy_attributes(_z85encode, z85encode) + z85decode = _copy_attributes(_z85decode, z85decode) except ImportError: pass From fd9eaf7ba9aec9d56ca5d0d92e8c3fedf217eb4c Mon Sep 17 00:00:00 2001 From: James Seo Date: Sun, 27 Apr 2025 20:08:10 -0700 Subject: [PATCH 09/12] Avoid circular import in _base64 This requires some code duplication, but oh well. --- Lib/_base64.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/Lib/_base64.py b/Lib/_base64.py index d1130ee421ab77..7ff16b0a7d4ac5 100644 --- a/Lib/_base64.py +++ b/Lib/_base64.py @@ -1,10 +1,29 @@ """C accelerator wrappers for originally pure-Python parts of base64.""" from binascii import Error, a2b_ascii85, a2b_base85, b2a_ascii85, b2a_base85 -from base64 import _bytes_from_decode_data, bytes_types -# Base 85 encoder functions in base64 silently convert input to bytes. +# Base 85 functions in base64 silently convert input to bytes. +# Copy the conversion logic from base64 to avoid circular imports. + +bytes_types = (bytes, bytearray) # Types acceptable as binary data + + +def _bytes_from_decode_data(s): + if isinstance(s, str): + try: + return s.encode('ascii') + except UnicodeEncodeError: + raise ValueError('string argument should contain only ASCII characters') + if isinstance(s, bytes_types): + return s + try: + return memoryview(s).tobytes() + except TypeError: + raise TypeError("argument should be a bytes-like object or ASCII " + "string, not %r" % s.__class__.__name__) from None + + def _bytes_from_encode_data(b): return b if isinstance(b, bytes_types) else memoryview(b).tobytes() From 4746d18185d167589a1630d7221e6f9d109f665e Mon Sep 17 00:00:00 2001 From: James Seo Date: Sun, 27 Apr 2025 20:09:41 -0700 Subject: [PATCH 10/12] Do not use a decorator for changing exception type Using a decorator complicates function signature introspection. --- Lib/_base64.py | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/Lib/_base64.py b/Lib/_base64.py index 7ff16b0a7d4ac5..040abc10669de6 100644 --- a/Lib/_base64.py +++ b/Lib/_base64.py @@ -29,48 +29,51 @@ def _bytes_from_encode_data(b): # Functions in binascii raise binascii.Error instead of ValueError. -def raise_valueerror(func): - def _func(*args, **kwargs): - try: - return func(*args, **kwargs) - except Error as e: - raise ValueError(e) from None - return _func - -@raise_valueerror def _a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): b = _bytes_from_encode_data(b) - return b2a_ascii85(b, fold_spaces=foldspaces, - wrap=adobe, width=wrapcol, pad=pad) + try: + return b2a_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, width=wrapcol, pad=pad) + except Error as e: + raise ValueError(e) from None -@raise_valueerror def _a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): b = _bytes_from_decode_data(b) - return a2b_ascii85(b, fold_spaces=foldspaces, - wrap=adobe, ignore=ignorechars) - + try: + return a2b_ascii85(b, fold_spaces=foldspaces, + wrap=adobe, ignore=ignorechars) + except Error as e: + raise ValueError(e) from None -@raise_valueerror def _b85encode(b, pad=False): b = _bytes_from_encode_data(b) - return b2a_base85(b, pad=pad, newline=False) + try: + return b2a_base85(b, pad=pad, newline=False) + except Error as e: + raise ValueError(e) from None -@raise_valueerror def _b85decode(b): b = _bytes_from_decode_data(b) - return a2b_base85(b, strict_mode=True) + try: + return a2b_base85(b, strict_mode=True) + except Error as e: + raise ValueError(e) from None -@raise_valueerror def _z85encode(s): s = _bytes_from_encode_data(s) - return b2a_base85(s, newline=False, z85=True) + try: + return b2a_base85(s, newline=False, z85=True) + except Error as e: + raise ValueError(e) from None -@raise_valueerror def _z85decode(s): s = _bytes_from_decode_data(s) - return a2b_base85(s, strict_mode=True, z85=True) + try: + return a2b_base85(s, strict_mode=True, z85=True) + except Error as e: + raise ValueError(e) from None From d075593f0b65af3fece22382f326fd07f072986c Mon Sep 17 00:00:00 2001 From: James Seo Date: Sun, 27 Apr 2025 20:39:18 -0700 Subject: [PATCH 11/12] Test Python and C codepaths in base64 using mixins Do we really need to test the legacy API twice? --- Lib/test/test_base64.py | 92 ++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 0f7cf82a858b78..6282da6b6bc8f0 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -2,39 +2,15 @@ import binascii import os from array import array -from functools import update_wrapper from test.support import os_helper from test.support import script_helper from test.support.import_helper import import_fresh_module -base64 = import_fresh_module("base64", blocked=["_base64"]) +py_base64 = import_fresh_module("base64", blocked=["_base64"]) c_base64 = import_fresh_module("base64", fresh=["_base64"]) -def with_c_implementation(test_func): - if c_base64 is None: - return test_func - - def _test_func(self): - global base64 - - # Test Python implementation - test_func(self) - - # Test C implementation - base64_ = base64 - try: - base64 = c_base64 - test_func(self) - finally: - base64 = base64_ - - update_wrapper(_test_func, test_func) - - return _test_func - - -class LegacyBase64TestCase(unittest.TestCase): +class LegacyBase64TestCase: # Legacy API is not as permissive as the modern API def check_type_errors(self, f): @@ -46,6 +22,7 @@ def check_type_errors(self, f): self.assertRaises(TypeError, f, int_data) def test_encodebytes(self): + base64 = self.module eq = self.assertEqual eq(base64.encodebytes(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=\n") eq(base64.encodebytes(b"a"), b"YQ==\n") @@ -67,6 +44,7 @@ def test_encodebytes(self): self.check_type_errors(base64.encodebytes) def test_decodebytes(self): + base64 = self.module eq = self.assertEqual eq(base64.decodebytes(b"d3d3LnB5dGhvbi5vcmc=\n"), b"www.python.org") eq(base64.decodebytes(b"YQ==\n"), b"a") @@ -88,6 +66,7 @@ def test_decodebytes(self): self.check_type_errors(base64.decodebytes) def test_encode(self): + base64 = self.module eq = self.assertEqual from io import BytesIO, StringIO infp = BytesIO(b'abcdefghijklmnopqrstuvwxyz' @@ -105,6 +84,7 @@ def test_encode(self): self.assertRaises(TypeError, base64.encode, StringIO('abc'), StringIO()) def test_decode(self): + base64 = self.module from io import BytesIO, StringIO infp = BytesIO(b'd3d3LnB5dGhvbi5vcmc=') outfp = BytesIO() @@ -116,7 +96,16 @@ def test_decode(self): self.assertRaises(TypeError, base64.encode, StringIO('YWJj\n'), StringIO()) -class BaseXYTestCase(unittest.TestCase): +class LegacyBase64TestCasePython(LegacyBase64TestCase, unittest.TestCase): + module = py_base64 + + +@unittest.skipUnless(c_base64, "requires _base64") +class LegacyBase64TestCaseC(LegacyBase64TestCase, unittest.TestCase): + module = c_base64 + + +class BaseXYTestCase: # Modern API completely ignores exported dimension and format data and # treats any buffer as a stream of bytes @@ -128,6 +117,7 @@ def check_decode_type_errors(self, f): self.assertRaises(TypeError, f, []) def check_other_types(self, f, bytes_data, expected): + base64 = self.module eq = self.assertEqual b = bytearray(bytes_data) eq(f(b), expected) @@ -154,6 +144,7 @@ def check_nonbyte_element_format(self, f, data): def test_b64encode(self): + base64 = self.module eq = self.assertEqual # Test default alphabet eq(base64.b64encode(b"www.python.org"), b"d3d3LnB5dGhvbi5vcmc=") @@ -204,6 +195,7 @@ def test_b64encode(self): self.check_encode_type_errors(base64.urlsafe_b64encode) def test_b64decode(self): + base64 = self.module eq = self.assertEqual tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org", @@ -260,10 +252,12 @@ def test_b64decode(self): self.check_decode_type_errors(base64.urlsafe_b64decode) def test_b64decode_padding_error(self): + base64 = self.module self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') def test_b64decode_invalid_chars(self): + base64 = self.module # issue 1466065: Test some invalid characters. tests = ((b'%3d==', b'\xdd'), (b'$3d==', b'\xdd'), @@ -296,6 +290,7 @@ def test_b64decode_invalid_chars(self): self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) def test_b32encode(self): + base64 = self.module eq = self.assertEqual eq(base64.b32encode(b''), b'') eq(base64.b32encode(b'\x00'), b'AA======') @@ -309,6 +304,7 @@ def test_b32encode(self): self.check_encode_type_errors(base64.b32encode) def test_b32decode(self): + base64 = self.module eq = self.assertEqual tests = {b'': b'', b'AA======': b'\x00', @@ -326,6 +322,7 @@ def test_b32decode(self): self.check_decode_type_errors(base64.b32decode) def test_b32decode_casefold(self): + base64 = self.module eq = self.assertEqual tests = {b'': b'', b'ME======': b'a', @@ -367,6 +364,7 @@ def test_b32decode_casefold(self): self.assertRaises(binascii.Error, base64.b32decode, data_str) def test_b32decode_error(self): + base64 = self.module tests = [b'abc', b'ABCDEF==', b'==ABCDEF'] prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] for i in range(0, 17): @@ -383,6 +381,7 @@ def test_b32decode_error(self): base64.b32decode(data.decode('ascii')) def test_b32hexencode(self): + base64 = self.module test_cases = [ # to_encode, expected (b'', b''), @@ -398,10 +397,12 @@ def test_b32hexencode(self): self.assertEqual(base64.b32hexencode(to_encode), expected) def test_b32hexencode_other_types(self): + base64 = self.module self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=') self.check_encode_type_errors(base64.b32hexencode) def test_b32hexdecode(self): + base64 = self.module test_cases = [ # to_decode, expected, casefold (b'', b'', False), @@ -432,10 +433,12 @@ def test_b32hexdecode(self): casefold), expected) def test_b32hexdecode_other_types(self): + base64 = self.module self.check_other_types(base64.b32hexdecode, b'C5H66===', b'abc') self.check_decode_type_errors(base64.b32hexdecode) def test_b32hexdecode_error(self): + base64 = self.module tests = [b'abc', b'ABCDEF==', b'==ABCDEF', b'c4======'] prefixes = [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF'] for i in range(0, 17): @@ -453,6 +456,7 @@ def test_b32hexdecode_error(self): def test_b16encode(self): + base64 = self.module eq = self.assertEqual eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF') eq(base64.b16encode(b'\x00'), b'00') @@ -462,6 +466,7 @@ def test_b16encode(self): self.check_encode_type_errors(base64.b16encode) def test_b16decode(self): + base64 = self.module eq = self.assertEqual eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef') eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef') @@ -488,8 +493,8 @@ def test_b16decode(self): # Incorrect "padding" self.assertRaises(binascii.Error, base64.b16decode, '010') - @with_c_implementation def test_a85encode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -539,8 +544,8 @@ def test_a85encode(self): eq(base64.a85encode(b' '*6, foldspaces=True, adobe=False), b'y+', b"www.python.org") - @with_c_implementation def test_b85decode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -692,8 +697,8 @@ def test_b85decode(self): self.check_other_types(base64.b85decode, b'cXxL#aCvlSZ*DGca%T', b"www.python.org") - @with_c_implementation def test_z85decode(self): + base64 = self.module eq = self.assertEqual tests = { @@ -728,8 +733,8 @@ def test_z85decode(self): self.check_other_types(base64.z85decode, b'CxXl-AcVLsz/dgCA+t', b'www.python.org') - @with_c_implementation def test_a85_padding(self): + base64 = self.module eq = self.assertEqual eq(base64.a85encode(b"x", pad=True), b'GQ7^D') @@ -744,8 +749,8 @@ def test_a85_padding(self): eq(base64.a85decode(b'G^+IX'), b"xxxx") eq(base64.a85decode(b'G^+IXGQ7^D'), b"xxxxx\x00\x00\x00") - @with_c_implementation def test_b85_padding(self): + base64 = self.module eq = self.assertEqual eq(base64.b85encode(b"x", pad=True), b'cmMzZ') @@ -760,8 +765,8 @@ def test_b85_padding(self): eq(base64.b85decode(b'czAet'), b"xxxx") eq(base64.b85decode(b'czAetcmMzZ'), b"xxxxx\x00\x00\x00") - @with_c_implementation def test_a85decode_errors(self): + base64 = self.module illegal = (set(range(32)) | set(range(118, 256))) - set(b' \t\n\r\v') for c in illegal: with self.assertRaises(ValueError, msg=bytes([c])): @@ -798,8 +803,8 @@ def test_a85decode_errors(self): self.assertRaises(ValueError, base64.a85decode, b'aaaay', foldspaces=True) - @with_c_implementation def test_b85decode_errors(self): + base64 = self.module illegal = list(range(33)) + \ list(b'"\',./:[\\]') + \ list(range(128, 256)) @@ -813,8 +818,8 @@ def test_b85decode_errors(self): self.assertRaises(ValueError, base64.b85decode, b'|NsC') self.assertRaises(ValueError, base64.b85decode, b'|NsC1') - @with_c_implementation def test_z85decode_errors(self): + base64 = self.module illegal = list(range(33)) + \ list(b'"\',;_`|\\~') + \ list(range(128, 256)) @@ -830,6 +835,7 @@ def test_z85decode_errors(self): self.assertRaises(ValueError, base64.z85decode, b'%nSc1') def test_decode_nonascii_str(self): + base64 = self.module decode_funcs = (base64.b64decode, base64.standard_b64decode, base64.urlsafe_b64decode, @@ -845,6 +851,7 @@ def test_ErrorHeritage(self): self.assertTrue(issubclass(binascii.Error, ValueError)) def test_RFC4648_test_cases(self): + base64 = self.module # test cases from RFC 4648 section 10 b64encode = base64.b64encode b32hexencode = base64.b32hexencode @@ -884,6 +891,15 @@ def test_RFC4648_test_cases(self): self.assertEqual(b16encode(b"foobar"), b"666F6F626172") +class BaseXYTestCasePython(BaseXYTestCase, unittest.TestCase): + module = py_base64 + + +@unittest.skipUnless(c_base64, "requires _base64") +class BaseXYTestCaseC(BaseXYTestCase, unittest.TestCase): + module = c_base64 + + class TestMain(unittest.TestCase): def tearDown(self): if os.path.exists(os_helper.TESTFN): From 6d65fec28f0d7b84b9f2c7f4a73d788bbd327500 Mon Sep 17 00:00:00 2001 From: James Seo Date: Mon, 28 Apr 2025 20:50:21 -0700 Subject: [PATCH 12/12] Remove leading underscore from functions in private module --- Lib/_base64.py | 12 ++++++------ Lib/base64.py | 9 ++++----- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Lib/_base64.py b/Lib/_base64.py index 040abc10669de6..d48577f36bb8ae 100644 --- a/Lib/_base64.py +++ b/Lib/_base64.py @@ -30,7 +30,7 @@ def _bytes_from_encode_data(b): # Functions in binascii raise binascii.Error instead of ValueError. -def _a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): +def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): b = _bytes_from_encode_data(b) try: return b2a_ascii85(b, fold_spaces=foldspaces, @@ -39,7 +39,7 @@ def _a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): raise ValueError(e) from None -def _a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): b = _bytes_from_decode_data(b) try: return a2b_ascii85(b, fold_spaces=foldspaces, @@ -47,7 +47,7 @@ def _a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): except Error as e: raise ValueError(e) from None -def _b85encode(b, pad=False): +def b85encode(b, pad=False): b = _bytes_from_encode_data(b) try: return b2a_base85(b, pad=pad, newline=False) @@ -55,7 +55,7 @@ def _b85encode(b, pad=False): raise ValueError(e) from None -def _b85decode(b): +def b85decode(b): b = _bytes_from_decode_data(b) try: return a2b_base85(b, strict_mode=True) @@ -63,7 +63,7 @@ def _b85decode(b): raise ValueError(e) from None -def _z85encode(s): +def z85encode(s): s = _bytes_from_encode_data(s) try: return b2a_base85(s, newline=False, z85=True) @@ -71,7 +71,7 @@ def _z85encode(s): raise ValueError(e) from None -def _z85decode(s): +def z85decode(s): s = _bytes_from_decode_data(s) try: return a2b_base85(s, strict_mode=True, z85=True) diff --git a/Lib/base64.py b/Lib/base64.py index 4ad32ad2d229f8..602b890dec3010 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -578,14 +578,13 @@ def decodebytes(s): # Use accelerated implementations of originally pure-Python parts if possible. try: - from _base64 import (_a85encode, _a85decode, _b85encode, - _b85decode, _z85encode, _z85decode) + from _base64 import (a85encode as _a85encode, a85decode as _a85decode, + b85encode as _b85encode, b85decode as _b85decode, + z85encode as _z85encode, z85decode as _z85decode) # Avoid expensive import of update_wrapper() from functools. def _copy_attributes(func, src_func): - func.__module__ = src_func.__module__ - func.__name__ = src_func.__name__ - func.__qualname__ = src_func.__qualname__ func.__doc__ = src_func.__doc__ + func.__module__ = "base64" return func a85encode = _copy_attributes(_a85encode, a85encode) a85decode = _copy_attributes(_a85decode, a85decode)