https://github.com/python/cpython/commit/4644fed8190e4646663605f3e824f0767a0d026d
commit: 4644fed8190e4646663605f3e824f0767a0d026d
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-02-05T21:14:49+02:00
summary:

gh-144001: Support ignoring the invalid pad character in Base64 decoding 
(GH-144306)

files:
M Doc/library/base64.rst
M Doc/library/binascii.rst
M Lib/test/test_base64.py
M Lib/test/test_binascii.py
M Modules/binascii.c

diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 478686bc30035c..554d6e7d04ded2 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -87,6 +87,9 @@ POST request.
 
    If *ignorechars* is specified, it should be a :term:`bytes-like object`
    containing characters to ignore from the input when *validate* is true.
+   If *ignorechars* contains the pad character ``'='``,  the pad characters
+   presented before the end of the encoded data and the excess pad characters
+   will be ignored.
    The default value of *validate* is ``True`` if *ignorechars* is specified,
    ``False`` otherwise.
 
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index d9f0baedec85f2..eb801175ee6179 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -56,6 +56,9 @@ The :mod:`binascii` module defines the following functions:
 
    If *ignorechars* is specified, it should be a :term:`bytes-like object`
    containing characters to ignore from the input when *strict_mode* is true.
+   If *ignorechars* contains the pad character ``'='``,  the pad characters
+   presented before the end of the encoded data and the excess pad characters
+   will be ignored.
    The default value of *strict_mode* is ``True`` if *ignorechars* is 
specified,
    ``False`` otherwise.
 
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 0f947409f0694b..fef18a1b757c08 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -306,7 +306,7 @@ def test_b64decode_invalid_chars(self):
         # issue 1466065: Test some invalid characters.
         tests = ((b'%3d==', b'\xdd', b'%$'),
                  (b'$3d==', b'\xdd', b'%$'),
-                 (b'[==', b'', None),
+                 (b'[==', b'', b'[='),
                  (b'YW]3=', b'am', b']'),
                  (b'3{d==', b'\xdd', b'{}'),
                  (b'3d}==', b'\xdd', b'{}'),
@@ -314,6 +314,12 @@ def test_b64decode_invalid_chars(self):
                  (b'!', b'', b'@!'),
                  (b"YWJj\n", b"abc", b'\n'),
                  (b'YWJj\nYWI=', b'abcab', b'\n'),
+                 (b'=YWJj', b'abc', b'='),
+                 (b'Y=WJj', b'abc', b'='),
+                 (b'Y==WJj', b'abc', b'='),
+                 (b'Y===WJj', b'abc', b'='),
+                 (b'YW=Jj', b'abc', b'='),
+                 (b'YWJj=', b'abc', b'='),
                  (b'YW\nJj', b'abc', b'\n'),
                  (b'YW\nJj', b'abc', bytearray(b'\n')),
                  (b'YW\nJj', b'abc', memoryview(b'\n')),
@@ -335,9 +341,8 @@ def test_b64decode_invalid_chars(self):
             with self.assertRaises(binascii.Error):
                 # Even empty ignorechars enables the strict mode.
                 base64.b64decode(bstr, ignorechars=b'')
-            if ignorechars is not None:
-                r = base64.b64decode(bstr, ignorechars=ignorechars)
-                self.assertEqual(r, res)
+            r = base64.b64decode(bstr, ignorechars=ignorechars)
+            self.assertEqual(r, res)
 
         with self.assertRaises(TypeError):
             base64.b64decode(b'', ignorechars='')
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 49accb08b62e40..a4928794e0acfb 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -118,66 +118,78 @@ def addnoise(line):
         # empty strings. TBD: shouldn't it raise an exception instead ?
         self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
 
-    def test_base64_strict_mode(self):
-        # Test base64 with strict mode on
-        def _assertRegexTemplate(assert_regex: str, data: bytes, 
non_strict_mode_expected_result: bytes):
+    def test_base64_bad_padding(self):
+        # Test malformed padding
+        def _assertRegexTemplate(assert_regex, data,
+                                 non_strict_mode_expected_result):
+            data = self.type2test(data)
             with self.assertRaisesRegex(binascii.Error, assert_regex):
-                binascii.a2b_base64(self.type2test(data), strict_mode=True)
-            self.assertEqual(binascii.a2b_base64(self.type2test(data), 
strict_mode=False),
+                binascii.a2b_base64(data, strict_mode=True)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
                              non_strict_mode_expected_result)
-            self.assertEqual(binascii.a2b_base64(self.type2test(data)),
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
+                                                 ignorechars=b'='),
+                             non_strict_mode_expected_result)
+            self.assertEqual(binascii.a2b_base64(data),
                              non_strict_mode_expected_result)
 
-        def assertExcessData(data, non_strict_mode_expected_result: bytes):
-            _assertRegexTemplate(r'(?i)Excess data', data, 
non_strict_mode_expected_result)
-
-        def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
-            _assertRegexTemplate(r'(?i)Only base64 data', data, 
non_strict_mode_expected_result)
+        def assertLeadingPadding(*args):
+            _assertRegexTemplate(r'(?i)Leading padding', *args)
 
-        def assertLeadingPadding(data, non_strict_mode_expected_result: bytes):
-            _assertRegexTemplate(r'(?i)Leading padding', data, 
non_strict_mode_expected_result)
+        def assertDiscontinuousPadding(*args):
+            _assertRegexTemplate(r'(?i)Discontinuous padding', *args)
 
-        def assertDiscontinuousPadding(data, non_strict_mode_expected_result: 
bytes):
-            _assertRegexTemplate(r'(?i)Discontinuous padding', data, 
non_strict_mode_expected_result)
+        def assertExcessPadding(*args):
+            _assertRegexTemplate(r'(?i)Excess padding', *args)
 
-        def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
-            _assertRegexTemplate(r'(?i)Excess padding', data, 
non_strict_mode_expected_result)
+        def assertInvalidLength(*args):
+            _assertRegexTemplate(r'(?i)Invalid.+number of data characters', 
*args)
 
-        # Test excess data exceptions
-        assertExcessData(b'ab==a', b'i')
         assertExcessPadding(b'ab===', b'i')
         assertExcessPadding(b'ab====', b'i')
-        assertNonBase64Data(b'ab==:', b'i')
-        assertExcessData(b'abc=a', b'i\xb7')
-        assertNonBase64Data(b'abc=:', b'i\xb7')
-        assertNonBase64Data(b'ab==\n', b'i')
         assertExcessPadding(b'abc==', b'i\xb7')
         assertExcessPadding(b'abc===', b'i\xb7')
         assertExcessPadding(b'abc====', b'i\xb7')
         assertExcessPadding(b'abc=====', b'i\xb7')
 
-        # Test non-base64 data exceptions
-        assertNonBase64Data(b'\nab==', b'i')
-        assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
-        assertNonBase64Data(b'a\nb==', b'i')
-        assertNonBase64Data(b'a\x00b==', b'i')
-
-        # Test malformed padding
         assertLeadingPadding(b'=', b'')
         assertLeadingPadding(b'==', b'')
         assertLeadingPadding(b'===', b'')
         assertLeadingPadding(b'====', b'')
         assertLeadingPadding(b'=====', b'')
+        assertLeadingPadding(b'=abcd', b'i\xb7\x1d')
+        assertLeadingPadding(b'==abcd', b'i\xb7\x1d')
+        assertLeadingPadding(b'===abcd', b'i\xb7\x1d')
+        assertLeadingPadding(b'====abcd', b'i\xb7\x1d')
+        assertLeadingPadding(b'=====abcd', b'i\xb7\x1d')
+
+        assertInvalidLength(b'a=b==', b'i')
+        assertInvalidLength(b'a=bc=', b'i\xb7')
+        assertInvalidLength(b'a=bc==', b'i\xb7')
+        assertInvalidLength(b'a=bcd', b'i\xb7\x1d')
+        assertInvalidLength(b'a=bcd=', b'i\xb7\x1d')
+
         assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
-        assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
-        assertNonBase64Data(b'ab=:=', b'i')
+        assertDiscontinuousPadding(b'ab=cd', b'i\xb7\x1d')
+        assertDiscontinuousPadding(b'ab=cd==', b'i\xb7\x1d')
+
         assertExcessPadding(b'abcd=', b'i\xb7\x1d')
         assertExcessPadding(b'abcd==', b'i\xb7\x1d')
         assertExcessPadding(b'abcd===', b'i\xb7\x1d')
         assertExcessPadding(b'abcd====', b'i\xb7\x1d')
         assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd==', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd===', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd====', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
+        assertExcessPadding(b'abcd=efgh', b'i\xb7\x1dy\xf8!')
+        assertExcessPadding(b'abcd==efgh', b'i\xb7\x1dy\xf8!')
+        assertExcessPadding(b'abcd===efgh', b'i\xb7\x1dy\xf8!')
+        assertExcessPadding(b'abcd====efgh', b'i\xb7\x1dy\xf8!')
+        assertExcessPadding(b'abcd=====efgh', b'i\xb7\x1dy\xf8!')
 
     def test_base64_invalidchars(self):
+        # Test non-base64 data exceptions
         def assertNonBase64Data(data, expected, ignorechars):
             data = self.type2test(data)
             assert_regex = r'(?i)Only base64 data'
@@ -195,10 +207,11 @@ def assertNonBase64Data(data, expected, ignorechars):
         assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
         assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
         assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
+        assertNonBase64Data(b'ab:==', b'i', ignorechars=b':')
+        assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
         assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
         assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
         assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
-        assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
         assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
         assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))
 
@@ -221,11 +234,37 @@ def assertNonBase64Data(data, expected, ignorechars):
         with self.assertRaises(TypeError):
             binascii.a2b_base64(data, ignorechars=None)
 
+    def test_base64_excess_data(self):
+        # Test excess data exceptions
+        def assertExcessData(data, non_strict_expected,
+                             ignore_padchar_expected=None):
+            assert_regex = r'(?i)Excess data'
+            data = self.type2test(data)
+            with self.assertRaisesRegex(binascii.Error, assert_regex):
+                binascii.a2b_base64(data, strict_mode=True)
+            self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
+                             non_strict_expected)
+            if ignore_padchar_expected is not None:
+                self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
+                                                     ignorechars=b'='),
+                                 ignore_padchar_expected)
+            self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
+
+        assertExcessData(b'ab==c', b'i')
+        assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
+        assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
+
     def test_base64errors(self):
         # Test base64 with invalid padding
-        def assertIncorrectPadding(data):
+        def assertIncorrectPadding(data, strict_mode=True):
+            data = self.type2test(data)
             with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect 
padding'):
-                binascii.a2b_base64(self.type2test(data))
+                binascii.a2b_base64(data)
+            with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect 
padding'):
+                binascii.a2b_base64(data, strict_mode=False)
+            if strict_mode:
+                with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect 
padding'):
+                    binascii.a2b_base64(data, strict_mode=True)
 
         assertIncorrectPadding(b'ab')
         assertIncorrectPadding(b'ab=')
@@ -233,16 +272,22 @@ def assertIncorrectPadding(data):
         assertIncorrectPadding(b'abcdef')
         assertIncorrectPadding(b'abcdef=')
         assertIncorrectPadding(b'abcdefg')
-        assertIncorrectPadding(b'a=b=')
-        assertIncorrectPadding(b'a\nb=')
+        assertIncorrectPadding(b'a=b=', strict_mode=False)
+        assertIncorrectPadding(b'a\nb=', strict_mode=False)
 
         # Test base64 with invalid number of valid characters (1 mod 4)
-        def assertInvalidLength(data):
+        def assertInvalidLength(data, strict_mode=True):
             n_data_chars = len(re.sub(br'[^A-Za-z0-9/+]', br'', data))
+            data = self.type2test(data)
             expected_errmsg_re = \
                 r'(?i)Invalid.+number of data characters.+' + str(n_data_chars)
             with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
-                binascii.a2b_base64(self.type2test(data))
+                binascii.a2b_base64(data)
+            with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
+                binascii.a2b_base64(data, strict_mode=False)
+            if strict_mode:
+                with self.assertRaisesRegex(binascii.Error, 
expected_errmsg_re):
+                    binascii.a2b_base64(data, strict_mode=True)
 
         assertInvalidLength(b'a')
         assertInvalidLength(b'a=')
@@ -250,7 +295,8 @@ def assertInvalidLength(data):
         assertInvalidLength(b'a===')
         assertInvalidLength(b'a' * 5)
         assertInvalidLength(b'a' * (4 * 87 + 1))
-        assertInvalidLength(b'A\tB\nC ??DE')  # only 5 valid characters
+        assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters
+                            strict_mode=False)
 
     def test_uu(self):
         MAX_UU = 45
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 201e7798bb7a8c..6d3d4e1a6d6daa 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -564,26 +564,24 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer 
*data, int strict_mode,
             pads++;
 
             if (strict_mode) {
-                if (quad_pos == 0) {
-                    state = get_binascii_state(module);
-                    if (state) {
-                        PyErr_SetString(state->Error, (ascii_data == data->buf)
-                            ? "Leading padding not allowed"
-                            : "Excess padding not allowed");
-                    }
-                    goto error_end;
+                if (quad_pos >= 2 && quad_pos + pads <= 4) {
+                    continue;
+                }
+                if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
+                    continue;
                 }
                 if (quad_pos == 1) {
                     /* Set an error below. */
                     break;
                 }
-                if (quad_pos + pads > 4) {
-                    state = get_binascii_state(module);
-                    if (state) {
-                        PyErr_SetString(state->Error, "Excess padding not 
allowed");
-                    }
-                    goto error_end;
+                state = get_binascii_state(module);
+                if (state) {
+                    PyErr_SetString(state->Error,
+                                    (quad_pos == 0 && ascii_data == data->buf)
+                                    ? "Leading padding not allowed"
+                                    : "Excess padding not allowed");
                 }
+                goto error_end;
             }
             else {
                 if (quad_pos >= 2 && quad_pos + pads >= 4) {
@@ -592,8 +590,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, 
int strict_mode,
                     */
                     goto done;
                 }
+                continue;
             }
-            continue;
         }
 
         unsigned char v = table_a2b_base64[this_ch];
@@ -609,7 +607,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, 
int strict_mode,
         }
 
         // Characters that are not '=', in the middle of the padding, are not 
allowed
-        if (strict_mode && pads) {
+        if (pads && strict_mode &&
+            !ignorechar(BASE64_PAD, ignorechars, ignorecache))
+        {
             state = get_binascii_state(module);
             if (state) {
                 PyErr_SetString(state->Error, (quad_pos + pads == 4)
@@ -662,7 +662,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, 
int strict_mode,
         goto error_end;
     }
 
-    if (quad_pos != 0 && quad_pos + pads != 4) {
+    if (quad_pos != 0 && quad_pos + pads < 4) {
         state = get_binascii_state(module);
         if (state) {
             PyErr_SetString(state->Error, "Incorrect padding");

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to