https://github.com/python/cpython/commit/4561f6418a691b3e89aef0901f53fe0dfb7f7c0e
commit: 4561f6418a691b3e89aef0901f53fe0dfb7f7c0e
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-03-22T23:12:58+02:00
summary:
gh-145264: Do not ignore excess Base64 data after the first padded quad
(GH-145267)
Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc)
no longer ignores excess data after the first padded quad in non-strict
(default) mode. Instead, in conformance with RFC 4648, it ignores the
pad character, "=", if it is present before the end of the encoded data.
files:
A Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
M Lib/test/test_binascii.py
M Modules/binascii.c
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index d4879667c71461..1dcd2b25c79087 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -274,23 +274,21 @@ def assertNonBase64Data(data, expected, ignorechars):
def test_base64_excess_data(self):
# Test excess data exceptions
- def assertExcessData(data, non_strict_expected,
- ignore_padchar_expected=None):
+ def assertExcessData(data, expected):
assert_regex = r'(?i)Excess data'
data = self.type2test(data)
with self.assertRaisesRegex(binascii.Error, assert_regex):
binascii.a2b_base64(data, strict_mode=True)
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
- non_strict_expected)
- if ignore_padchar_expected is not None:
- self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
- ignorechars=b'='),
- ignore_padchar_expected)
- self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
-
- assertExcessData(b'ab==c', b'i')
- assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
- assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
+ expected)
+ self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
+ ignorechars=b'='),
+ expected)
+ self.assertEqual(binascii.a2b_base64(data), expected)
+
+ assertExcessData(b'ab==c=', b'i\xb7')
+ assertExcessData(b'ab==cd', b'i\xb7\x1d')
+ assertExcessData(b'abc=d', b'i\xb7\x1d')
def test_base64errors(self):
# Test base64 with invalid padding
diff --git
a/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
new file mode 100644
index 00000000000000..22d53fe8db1123
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
@@ -0,0 +1,4 @@
+Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`,
etc) no
+longer ignores excess data after the first padded quad in non-strict
+(default) mode. Instead, in conformance with :rfc:`4648`, section 3.3, it now
ignores
+the pad character, "=", if it is present before the end of the encoded data.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 7907b74e36f085..a57bf3ee6339f5 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -800,40 +800,33 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode,
*/
if (this_ch == BASE64_PAD) {
pads++;
-
- if (strict_mode) {
- if (quad_pos >= 2 && quad_pos + pads <= 4) {
- continue;
- }
- if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
- continue;
- }
- if (quad_pos == 1) {
- /* Set an error below. */
- break;
- }
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error,
- (quad_pos == 0 && ascii_data == data->buf)
- ? "Leading padding not allowed"
- : "Excess padding not allowed");
- }
- goto error_end;
+ if (quad_pos >= 2 && quad_pos + pads <= 4) {
+ continue;
}
- else {
- if (quad_pos >= 2 && quad_pos + pads >= 4) {
- /* A pad sequence means we should not parse more input.
- ** We've already interpreted the data from the quad at
this point.
- */
- goto done;
- }
+ // See RFC 4648, section-3.3: "specifications MAY ignore the
+ // pad character, "=", treating it as non-alphabet data, if
+ // it is present before the end of the encoded data" and
+ // "the excess pad characters MAY also be ignored."
+ if (!strict_mode || ignorechar(BASE64_PAD, ignorechars,
ignorecache)) {
continue;
}
+ if (quad_pos == 1) {
+ /* Set an error below. */
+ break;
+ }
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error,
+ (quad_pos == 0 && ascii_data == data->buf)
+ ? "Leading padding not allowed"
+ : "Excess padding not allowed");
+ }
+ goto error_end;
}
unsigned char v = table_a2b[this_ch];
if (v >= 64) {
+ // See RFC 4648, section-3.3.
if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache))
{
state = get_binascii_state(module);
if (state) {
@@ -844,7 +837,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data,
int strict_mode,
continue;
}
- // Characters that are not '=', in the middle of the padding, are not
allowed
+ // Characters that are not '=', in the middle of the padding, are
+ // not allowed (except when they are). See RFC 4648, section-3.3.
if (pads && strict_mode &&
!ignorechar(BASE64_PAD, ignorechars, ignorecache))
{
@@ -908,7 +902,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data,
int strict_mode,
goto error_end;
}
-done:
Py_XDECREF(table_obj);
return PyBytesWriter_FinishWithPointer(writer, bin_data);
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]