https://github.com/python/cpython/commit/4644fed8190e4646663605f3e824f0767a0d026d
commit: 4644fed8190e4646663605f3e824f0767a0d026d
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-02-05T21:14:49+02:00
summary:
gh-144001: Support ignoring the invalid pad character in Base64 decoding
(GH-144306)
files:
M Doc/library/base64.rst
M Doc/library/binascii.rst
M Lib/test/test_base64.py
M Lib/test/test_binascii.py
M Modules/binascii.c
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 478686bc30035c..554d6e7d04ded2 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -87,6 +87,9 @@ POST request.
If *ignorechars* is specified, it should be a :term:`bytes-like object`
containing characters to ignore from the input when *validate* is true.
+ If *ignorechars* contains the pad character ``'='``, the pad characters
+ presented before the end of the encoded data and the excess pad characters
+ will be ignored.
The default value of *validate* is ``True`` if *ignorechars* is specified,
``False`` otherwise.
diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst
index d9f0baedec85f2..eb801175ee6179 100644
--- a/Doc/library/binascii.rst
+++ b/Doc/library/binascii.rst
@@ -56,6 +56,9 @@ The :mod:`binascii` module defines the following functions:
If *ignorechars* is specified, it should be a :term:`bytes-like object`
containing characters to ignore from the input when *strict_mode* is true.
+ If *ignorechars* contains the pad character ``'='``, the pad characters
+ presented before the end of the encoded data and the excess pad characters
+ will be ignored.
The default value of *strict_mode* is ``True`` if *ignorechars* is
specified,
``False`` otherwise.
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 0f947409f0694b..fef18a1b757c08 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -306,7 +306,7 @@ def test_b64decode_invalid_chars(self):
# issue 1466065: Test some invalid characters.
tests = ((b'%3d==', b'\xdd', b'%$'),
(b'$3d==', b'\xdd', b'%$'),
- (b'[==', b'', None),
+ (b'[==', b'', b'[='),
(b'YW]3=', b'am', b']'),
(b'3{d==', b'\xdd', b'{}'),
(b'3d}==', b'\xdd', b'{}'),
@@ -314,6 +314,12 @@ def test_b64decode_invalid_chars(self):
(b'!', b'', b'@!'),
(b"YWJj\n", b"abc", b'\n'),
(b'YWJj\nYWI=', b'abcab', b'\n'),
+ (b'=YWJj', b'abc', b'='),
+ (b'Y=WJj', b'abc', b'='),
+ (b'Y==WJj', b'abc', b'='),
+ (b'Y===WJj', b'abc', b'='),
+ (b'YW=Jj', b'abc', b'='),
+ (b'YWJj=', b'abc', b'='),
(b'YW\nJj', b'abc', b'\n'),
(b'YW\nJj', b'abc', bytearray(b'\n')),
(b'YW\nJj', b'abc', memoryview(b'\n')),
@@ -335,9 +341,8 @@ def test_b64decode_invalid_chars(self):
with self.assertRaises(binascii.Error):
# Even empty ignorechars enables the strict mode.
base64.b64decode(bstr, ignorechars=b'')
- if ignorechars is not None:
- r = base64.b64decode(bstr, ignorechars=ignorechars)
- self.assertEqual(r, res)
+ r = base64.b64decode(bstr, ignorechars=ignorechars)
+ self.assertEqual(r, res)
with self.assertRaises(TypeError):
base64.b64decode(b'', ignorechars='')
diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 49accb08b62e40..a4928794e0acfb 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -118,66 +118,78 @@ def addnoise(line):
# empty strings. TBD: shouldn't it raise an exception instead ?
self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'')
- def test_base64_strict_mode(self):
- # Test base64 with strict mode on
- def _assertRegexTemplate(assert_regex: str, data: bytes,
non_strict_mode_expected_result: bytes):
+ def test_base64_bad_padding(self):
+ # Test malformed padding
+ def _assertRegexTemplate(assert_regex, data,
+ non_strict_mode_expected_result):
+ data = self.type2test(data)
with self.assertRaisesRegex(binascii.Error, assert_regex):
- binascii.a2b_base64(self.type2test(data), strict_mode=True)
- self.assertEqual(binascii.a2b_base64(self.type2test(data),
strict_mode=False),
+ binascii.a2b_base64(data, strict_mode=True)
+ self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
non_strict_mode_expected_result)
- self.assertEqual(binascii.a2b_base64(self.type2test(data)),
+ self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
+ ignorechars=b'='),
+ non_strict_mode_expected_result)
+ self.assertEqual(binascii.a2b_base64(data),
non_strict_mode_expected_result)
- def assertExcessData(data, non_strict_mode_expected_result: bytes):
- _assertRegexTemplate(r'(?i)Excess data', data,
non_strict_mode_expected_result)
-
- def assertNonBase64Data(data, non_strict_mode_expected_result: bytes):
- _assertRegexTemplate(r'(?i)Only base64 data', data,
non_strict_mode_expected_result)
+ def assertLeadingPadding(*args):
+ _assertRegexTemplate(r'(?i)Leading padding', *args)
- def assertLeadingPadding(data, non_strict_mode_expected_result: bytes):
- _assertRegexTemplate(r'(?i)Leading padding', data,
non_strict_mode_expected_result)
+ def assertDiscontinuousPadding(*args):
+ _assertRegexTemplate(r'(?i)Discontinuous padding', *args)
- def assertDiscontinuousPadding(data, non_strict_mode_expected_result:
bytes):
- _assertRegexTemplate(r'(?i)Discontinuous padding', data,
non_strict_mode_expected_result)
+ def assertExcessPadding(*args):
+ _assertRegexTemplate(r'(?i)Excess padding', *args)
- def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
- _assertRegexTemplate(r'(?i)Excess padding', data,
non_strict_mode_expected_result)
+ def assertInvalidLength(*args):
+ _assertRegexTemplate(r'(?i)Invalid.+number of data characters',
*args)
- # Test excess data exceptions
- assertExcessData(b'ab==a', b'i')
assertExcessPadding(b'ab===', b'i')
assertExcessPadding(b'ab====', b'i')
- assertNonBase64Data(b'ab==:', b'i')
- assertExcessData(b'abc=a', b'i\xb7')
- assertNonBase64Data(b'abc=:', b'i\xb7')
- assertNonBase64Data(b'ab==\n', b'i')
assertExcessPadding(b'abc==', b'i\xb7')
assertExcessPadding(b'abc===', b'i\xb7')
assertExcessPadding(b'abc====', b'i\xb7')
assertExcessPadding(b'abc=====', b'i\xb7')
- # Test non-base64 data exceptions
- assertNonBase64Data(b'\nab==', b'i')
- assertNonBase64Data(b'ab:(){:|:&};:==', b'i')
- assertNonBase64Data(b'a\nb==', b'i')
- assertNonBase64Data(b'a\x00b==', b'i')
-
- # Test malformed padding
assertLeadingPadding(b'=', b'')
assertLeadingPadding(b'==', b'')
assertLeadingPadding(b'===', b'')
assertLeadingPadding(b'====', b'')
assertLeadingPadding(b'=====', b'')
+ assertLeadingPadding(b'=abcd', b'i\xb7\x1d')
+ assertLeadingPadding(b'==abcd', b'i\xb7\x1d')
+ assertLeadingPadding(b'===abcd', b'i\xb7\x1d')
+ assertLeadingPadding(b'====abcd', b'i\xb7\x1d')
+ assertLeadingPadding(b'=====abcd', b'i\xb7\x1d')
+
+ assertInvalidLength(b'a=b==', b'i')
+ assertInvalidLength(b'a=bc=', b'i\xb7')
+ assertInvalidLength(b'a=bc==', b'i\xb7')
+ assertInvalidLength(b'a=bcd', b'i\xb7\x1d')
+ assertInvalidLength(b'a=bcd=', b'i\xb7\x1d')
+
assertDiscontinuousPadding(b'ab=c=', b'i\xb7')
- assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b')
- assertNonBase64Data(b'ab=:=', b'i')
+ assertDiscontinuousPadding(b'ab=cd', b'i\xb7\x1d')
+ assertDiscontinuousPadding(b'ab=cd==', b'i\xb7\x1d')
+
assertExcessPadding(b'abcd=', b'i\xb7\x1d')
assertExcessPadding(b'abcd==', b'i\xb7\x1d')
assertExcessPadding(b'abcd===', b'i\xb7\x1d')
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
+ assertExcessPadding(b'abcd==', b'i\xb7\x1d')
+ assertExcessPadding(b'abcd===', b'i\xb7\x1d')
+ assertExcessPadding(b'abcd====', b'i\xb7\x1d')
+ assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
+ assertExcessPadding(b'abcd=efgh', b'i\xb7\x1dy\xf8!')
+ assertExcessPadding(b'abcd==efgh', b'i\xb7\x1dy\xf8!')
+ assertExcessPadding(b'abcd===efgh', b'i\xb7\x1dy\xf8!')
+ assertExcessPadding(b'abcd====efgh', b'i\xb7\x1dy\xf8!')
+ assertExcessPadding(b'abcd=====efgh', b'i\xb7\x1dy\xf8!')
def test_base64_invalidchars(self):
+ # Test non-base64 data exceptions
def assertNonBase64Data(data, expected, ignorechars):
data = self.type2test(data)
assert_regex = r'(?i)Only base64 data'
@@ -195,10 +207,11 @@ def assertNonBase64Data(data, expected, ignorechars):
assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&')
assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n')
assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00')
+ assertNonBase64Data(b'ab:==', b'i', ignorechars=b':')
+ assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
assertNonBase64Data(b'ab==:', b'i', ignorechars=b':')
assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':')
assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n')
- assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':')
assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n'))
assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n'))
@@ -221,11 +234,37 @@ def assertNonBase64Data(data, expected, ignorechars):
with self.assertRaises(TypeError):
binascii.a2b_base64(data, ignorechars=None)
+ def test_base64_excess_data(self):
+ # Test excess data exceptions
+ def assertExcessData(data, non_strict_expected,
+ ignore_padchar_expected=None):
+ assert_regex = r'(?i)Excess data'
+ data = self.type2test(data)
+ with self.assertRaisesRegex(binascii.Error, assert_regex):
+ binascii.a2b_base64(data, strict_mode=True)
+ self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
+ non_strict_expected)
+ if ignore_padchar_expected is not None:
+ self.assertEqual(binascii.a2b_base64(data, strict_mode=True,
+ ignorechars=b'='),
+ ignore_padchar_expected)
+ self.assertEqual(binascii.a2b_base64(data), non_strict_expected)
+
+ assertExcessData(b'ab==c', b'i')
+ assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d')
+ assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d')
+
def test_base64errors(self):
# Test base64 with invalid padding
- def assertIncorrectPadding(data):
+ def assertIncorrectPadding(data, strict_mode=True):
+ data = self.type2test(data)
with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect
padding'):
- binascii.a2b_base64(self.type2test(data))
+ binascii.a2b_base64(data)
+ with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect
padding'):
+ binascii.a2b_base64(data, strict_mode=False)
+ if strict_mode:
+ with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect
padding'):
+ binascii.a2b_base64(data, strict_mode=True)
assertIncorrectPadding(b'ab')
assertIncorrectPadding(b'ab=')
@@ -233,16 +272,22 @@ def assertIncorrectPadding(data):
assertIncorrectPadding(b'abcdef')
assertIncorrectPadding(b'abcdef=')
assertIncorrectPadding(b'abcdefg')
- assertIncorrectPadding(b'a=b=')
- assertIncorrectPadding(b'a\nb=')
+ assertIncorrectPadding(b'a=b=', strict_mode=False)
+ assertIncorrectPadding(b'a\nb=', strict_mode=False)
# Test base64 with invalid number of valid characters (1 mod 4)
- def assertInvalidLength(data):
+ def assertInvalidLength(data, strict_mode=True):
n_data_chars = len(re.sub(br'[^A-Za-z0-9/+]', br'', data))
+ data = self.type2test(data)
expected_errmsg_re = \
r'(?i)Invalid.+number of data characters.+' + str(n_data_chars)
with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
- binascii.a2b_base64(self.type2test(data))
+ binascii.a2b_base64(data)
+ with self.assertRaisesRegex(binascii.Error, expected_errmsg_re):
+ binascii.a2b_base64(data, strict_mode=False)
+ if strict_mode:
+ with self.assertRaisesRegex(binascii.Error,
expected_errmsg_re):
+ binascii.a2b_base64(data, strict_mode=True)
assertInvalidLength(b'a')
assertInvalidLength(b'a=')
@@ -250,7 +295,8 @@ def assertInvalidLength(data):
assertInvalidLength(b'a===')
assertInvalidLength(b'a' * 5)
assertInvalidLength(b'a' * (4 * 87 + 1))
- assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters
+ assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters
+ strict_mode=False)
def test_uu(self):
MAX_UU = 45
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 201e7798bb7a8c..6d3d4e1a6d6daa 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -564,26 +564,24 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer
*data, int strict_mode,
pads++;
if (strict_mode) {
- if (quad_pos == 0) {
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error, (ascii_data == data->buf)
- ? "Leading padding not allowed"
- : "Excess padding not allowed");
- }
- goto error_end;
+ if (quad_pos >= 2 && quad_pos + pads <= 4) {
+ continue;
+ }
+ if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) {
+ continue;
}
if (quad_pos == 1) {
/* Set an error below. */
break;
}
- if (quad_pos + pads > 4) {
- state = get_binascii_state(module);
- if (state) {
- PyErr_SetString(state->Error, "Excess padding not
allowed");
- }
- goto error_end;
+ state = get_binascii_state(module);
+ if (state) {
+ PyErr_SetString(state->Error,
+ (quad_pos == 0 && ascii_data == data->buf)
+ ? "Leading padding not allowed"
+ : "Excess padding not allowed");
}
+ goto error_end;
}
else {
if (quad_pos >= 2 && quad_pos + pads >= 4) {
@@ -592,8 +590,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data,
int strict_mode,
*/
goto done;
}
+ continue;
}
- continue;
}
unsigned char v = table_a2b_base64[this_ch];
@@ -609,7 +607,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data,
int strict_mode,
}
// Characters that are not '=', in the middle of the padding, are not
allowed
- if (strict_mode && pads) {
+ if (pads && strict_mode &&
+ !ignorechar(BASE64_PAD, ignorechars, ignorecache))
+ {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, (quad_pos + pads == 4)
@@ -662,7 +662,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data,
int strict_mode,
goto error_end;
}
- if (quad_pos != 0 && quad_pos + pads != 4) {
+ if (quad_pos != 0 && quad_pos + pads < 4) {
state = get_binascii_state(module);
if (state) {
PyErr_SetString(state->Error, "Incorrect padding");
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]