https://github.com/python/cpython/commit/e31c55121620189a0d1a07b689762d8ca9c1b7fa
commit: e31c55121620189a0d1a07b689762d8ca9c1b7fa
branch: 3.14
author: Serhiy Storchaka <[email protected]>
committer: ambv <[email protected]>
date: 2026-03-24T00:20:26+01:00
summary:
[3.14] gh-145264: Do not ignore excess Base64 data after the first padded quad
(GH-145267) (GH-146326)
Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc)
no longer ignores excess data after the first padded quad in non-strict
(default) mode. Instead, in conformance with RFC 4648, it ignores the
pad character, "=", if it is present before the end of the encoded data.
(cherry picked from commit 4561f6418a691b3e89aef0901f53fe0dfb7f7c0e)
files:
A Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
M Lib/test/test_binascii.py
M Modules/binascii.c
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 7ed7d7c47b6de1..c04ab1e2a5eeb8 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -143,17 +143,16 @@ def assertExcessPadding(data,
non_strict_mode_expected_result: bytes):
_assertRegexTemplate(r'(?i)Excess padding', data,
non_strict_mode_expected_result)
# Test excess data exceptions
- assertExcessData(b'ab==a', b'i')
- assertExcessData(b'ab===', b'i')
- assertExcessData(b'ab====', b'i')
- assertExcessData(b'ab==:', b'i')
- assertExcessData(b'abc=a', b'i\xb7')
- assertExcessData(b'abc=:', b'i\xb7')
- assertExcessData(b'ab==\n', b'i')
- assertExcessData(b'abc==', b'i\xb7')
- assertExcessData(b'abc===', b'i\xb7')
- assertExcessData(b'abc====', b'i\xb7')
- assertExcessData(b'abc=====', b'i\xb7')
+ assertExcessPadding(b'ab===', b'i')
+ assertExcessPadding(b'ab====', b'i')
+ assertNonBase64Data(b'ab==:', b'i')
+ assertExcessData(b'abc=a', b'i\xb7\x1a')
+ assertNonBase64Data(b'abc=:', b'i\xb7')
+ assertNonBase64Data(b'ab==\n', b'i')
+ assertExcessPadding(b'abc==', b'i\xb7')
+ assertExcessPadding(b'abc===', b'i\xb7')
+ assertExcessPadding(b'abc====', b'i\xb7')
+ assertExcessPadding(b'abc=====', b'i\xb7')
# Test non-base64 data exceptions
assertNonBase64Data(b'\nab==', b'i')
@@ -175,6 +174,20 @@ def assertExcessPadding(data,
non_strict_mode_expected_result: bytes):
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
+ def test_base64_excess_data(self):
+ # Test excess data exceptions
+ def assertExcessData(data, expected):
+ assert_regex = r'(?i)Excess data'
+ data = self.type2test(data)
+ with self.assertRaisesRegex(binascii.Error, assert_regex):
+ binascii.a2b_base64(data, strict_mode=True)
+ self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
+ expected)
+ self.assertEqual(binascii.a2b_base64(data), expected)
+
+ assertExcessData(b'ab==c=', b'i\xb7')
+ assertExcessData(b'ab==cd', b'i\xb7\x1d')
+ assertExcessData(b'abc=d', b'i\xb7\x1d')
def test_base64errors(self):
# Test base64 with invalid padding
diff --git
a/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
new file mode 100644
index 00000000000000..22d53fe8db1123
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-02-26-20-13-16.gh-issue-145264.4pggX_.rst
@@ -0,0 +1,4 @@
+Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`,
etc) no
+longer ignores excess data after the first padded quad in non-strict
+(default) mode. Instead, in conformance with :rfc:`4648`, section 3.3, it now
ignores
+the pad character, "=", if it is present before the end of the encoded data.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 6bb01d148b6faa..1030eb15f4169c 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -383,7 +383,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data,
int strict_mode)
const unsigned char *ascii_data = data->buf;
size_t ascii_len = data->len;
binascii_state *state = NULL;
- char padding_started = 0;
/* Allocate the buffer */
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later
*/
@@ -394,14 +393,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
return NULL;
unsigned char *bin_data_start = bin_data;
- if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error, "Leading padding not allowed");
- }
- goto error_end;
- }
-
int quad_pos = 0;
unsigned char leftchar = 0;
int pads = 0;
@@ -412,35 +403,34 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
** the invalid ones.
*/
if (this_ch == BASE64_PAD) {
- padding_started = 1;
-
- if (strict_mode && quad_pos == 0) {
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error, "Excess padding not
allowed");
- }
- goto error_end;
+ pads++;
+ if (quad_pos >= 2 && quad_pos + pads <= 4) {
+ continue;
}
- if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
- /* A pad sequence means we should not parse more input.
- ** We've already interpreted the data from the quad at this
point.
- ** in strict mode, an error should raise if there's excess
data after the padding.
- */
- if (strict_mode && i + 1 < ascii_len) {
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error, "Excess data after
padding");
- }
- goto error_end;
- }
-
- goto done;
+ // See RFC 4648, section-3.3: "specifications MAY ignore the
+ // pad character, "=", treating it as non-alphabet data, if
+ // it is present before the end of the encoded data" and
+ // "the excess pad characters MAY also be ignored."
+ if (!strict_mode) {
+ continue;
}
- continue;
+ if (quad_pos == 1) {
+ /* Set an error below. */
+ break;
+ }
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error,
+ (quad_pos == 0 && i == 0)
+ ? "Leading padding not allowed"
+ : "Excess padding not allowed");
+ }
+ goto error_end;
}
this_ch = table_a2b_base64[this_ch];
if (this_ch >= 64) {
+ // See RFC 4648, section-3.3.
if (strict_mode) {
state = get_binascii_state(module);
if (state) {
@@ -451,11 +441,14 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
continue;
}
- // Characters that are not '=', in the middle of the padding, are not
allowed
- if (strict_mode && padding_started) {
+ // Characters that are not '=', in the middle of the padding, are
+ // not allowed (except when they are). See RFC 4648, section-3.3.
+ if (pads && strict_mode) {
state = get_binascii_state(module);
if (state) {
- PyErr_SetString(state->Error, "Discontinuous padding not
allowed");
+ PyErr_SetString(state->Error, (quad_pos + pads == 4)
+ ? "Excess data after padding"
+ : "Discontinuous padding not allowed");
}
goto error_end;
}
@@ -484,31 +477,35 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode)
}
}
- if (quad_pos != 0) {
+ if (quad_pos == 1) {
+ /* There is exactly one extra valid, non-padding, base64 character.
+ * * This is an invalid length, as there is no possible input that
+ ** could encoded into such a base64 string.
+ */
state = get_binascii_state(module);
- if (state == NULL) {
- /* error already set, from get_binascii_state */
- } else if (quad_pos == 1) {
- /*
- ** There is exactly one extra valid, non-padding, base64 character.
- ** This is an invalid length, as there is no possible input that
- ** could encoded into such a base64 string.
- */
+ if (state) {
PyErr_Format(state->Error,
"Invalid base64-encoded string: "
"number of data characters (%zd) cannot be 1 more "
"than a multiple of 4",
(bin_data - bin_data_start) / 3 * 4 + 1);
- } else {
+ }
+ goto error_end;
+ }
+
+ if (quad_pos != 0 && quad_pos + pads < 4) {
+ state = get_binascii_state(module);
+ if (state) {
PyErr_SetString(state->Error, "Incorrect padding");
}
- error_end:
- _PyBytesWriter_Dealloc(&writer);
- return NULL;
+ goto error_end;
}
-done:
return _PyBytesWriter_Finish(&writer, bin_data);
+
+error_end:
+ _PyBytesWriter_Dealloc(&writer);
+ return NULL;
}
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]