[Python-checkins] gh-145264: Do not ignore excess Base64 data after the first padded quad (GH-145267)

serhiy-storchaka Sun, 22 Mar 2026 14:13:31 -0700

https://github.com/python/cpython/commit/4561f6418a691b3e89aef0901f53fe0dfb7f7c0e
commit: 4561f6418a691b3e89aef0901f53fe0dfb7f7c0e
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-03-22T23:12:58+02:00
summary:


gh-145264: Do not ignore excess Base64 data after the first padded quad 
(GH-145267)

Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc)
no longer ignores excess data after the first padded quad in non-strict
(default) mode.  Instead, in conformance with RFC 4648, it ignores the
pad character, "=", if it is present before the end of the encoded data.

files:
A Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
M Lib/test/test_binascii.py
M Modules/binascii.c

diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index d4879667c71461..1dcd2b25c79087 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -274,23 +274,21 @@ def assertNonBase64Data(data, expected, ignorechars):
 
     def test_base64_excess_data(self):
         # Test excess data exceptions
-        def assertExcessData(data, non_strict_expected,
-                             ignore_padchar_expected=None):
+        def assertExcessData(data, expected):
             assert_regex = r'(?i)Excess data'
             data = self.type2test(data)
             with self.assertRaisesRegex(binascii.Error, assert_regex):
                 binascii.a2b_base64(data, strict_mode=True)
             self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
-                             non_strict_expected)
-            if ignore_padchar_expected is not None:
-                self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
-                                                     ignorechars=b'='),
-                                 ignore_padchar_expected)
-            self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
-
-        assertExcessData(b'ab==c', b'i')
-        assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
-        assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
+                                                 ignorechars=b'='),
+                             expected)
+            self.assertEqual(binascii.a2b_base64(data), expected)
+
+        assertExcessData(b'ab==c=', b'i\xb7')
+        assertExcessData(b'ab==cd', b'i\xb7\x1d')
+        assertExcessData(b'abc=d', b'i\xb7\x1d')
 
     def test_base64errors(self):
         # Test base64 with invalid padding
diff --git 
a/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst 
b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
new file mode 100644
index 00000000000000..22d53fe8db1123
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
@@ -0,0 +1,4 @@
+Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, 
etc) no
+longer ignores excess data after the first padded quad in non-strict
+(default) mode.  Instead, in conformance with :rfc:`4648`, section 3.3, it now 
ignores
+the pad character, "=", if it is present before the end of the encoded data.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 7907b74e36f085..a57bf3ee6339f5 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -800,40 +800,33 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer 
*data, int strict_mode,
         */
         if (this_ch == BASE64_PAD) {
             pads++;
-
-            if (strict_mode) {
-                if (quad_pos >= 2 && quad_pos + pads <= 4) {
-                    continue;
-                }
-                if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
-                    continue;
-                }
-                if (quad_pos == 1) {
-                    /* Set an error below. */
-                    break;
-                }
-                state = get_binascii_state(module);
-                if (state) {
-                    PyErr_SetString(state->Error,
-                                    (quad_pos == 0 && ascii_data == data->buf)
-                                    ? "Leading padding not allowed"
-                                    : "Excess padding not allowed");
-                }
-                goto error_end;
+            if (quad_pos >= 2 && quad_pos + pads <= 4) {
+                continue;
             }
-            else {
-                if (quad_pos >= 2 && quad_pos + pads >= 4) {
-                    /* A pad sequence means we should not parse more input.
-                    ** We've already interpreted the data from the quad at 
this point.
-                    */
-                    goto done;
-                }
+            // See RFC 4648, section-3.3: "specifications MAY ignore the
+            // pad character, "=", treating it as non-alphabet data, if
+            // it is present before the end of the encoded data" and
+            // "the excess pad characters MAY also be ignored."
+            if (!strict_mode || ignorechar(BASE64_PAD, ignorechars, 
ignorecache)) {
                 continue;
             }
+            if (quad_pos == 1) {
+                /* Set an error below. */
+                break;
+            }
+            state = get_binascii_state(module);
+            if (state) {
+                PyErr_SetString(state->Error,
+                                (quad_pos == 0 && ascii_data == data->buf)
+                                ? "Leading padding not allowed"
+                                : "Excess padding not allowed");
+            }
+            goto error_end;
         }
 
         unsigned char v = table_a2b[this_ch];
         if (v >= 64) {
+            // See RFC 4648, section-3.3.
             if (strict_mode && !ignorechar(this_ch, ignorechars, ignorecache)) 
{
                 state = get_binascii_state(module);
                 if (state) {
@@ -844,7 +837,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, 
int strict_mode,
             continue;
         }
 
-        // Characters that are not '=', in the middle of the padding, are not 
allowed
+        // Characters that are not '=', in the middle of the padding, are
+        // not allowed (except when they are). See RFC 4648, section-3.3.
         if (pads && strict_mode &&
             !ignorechar(BASE64_PAD, ignorechars, ignorecache))
         {
@@ -908,7 +902,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, 
int strict_mode,
         goto error_end;
     }
 
-done:
     Py_XDECREF(table_obj);
     return PyBytesWriter_FinishWithPointer(writer, bin_data);
 

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] gh-145264: Do not ignore excess Base64 data after the first padded quad (GH-145267)

Reply via email to