https://github.com/python/cpython/commit/5a8b19677d818fb41ee55f310233772e15aa1a2b
commit: 5a8b19677d818fb41ee55f310233772e15aa1a2b
branch: 3.12
author: Serhiy Storchaka <[email protected]>
committer: Yhg1s <[email protected]>
date: 2025-12-22T14:49:44+01:00
summary:
[3.12] gh-119342: Fix a potential denial of service in plistlib (GH-119343)
(#142149)
Reading a specially prepared small Plist file could cause OOM because file's
read(n) preallocates a bytes object for reading the specified amount of
data. Now plistlib reads large data by chunks, therefore the upper limit of
consumed memory is proportional to the size of the input file.
(cherry picked from commit 694922cf40aa3a28f898b5f5ee08b71b4922df70)
files:
A Misc/NEWS.d/next/Security/2024-05-21-22-11-31.gh-issue-119342.BTFj4Z.rst
M Lib/plistlib.py
M Lib/test/test_plistlib.py
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index 3292c30d5fb29b..c5554ea1f753b0 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -73,6 +73,9 @@
PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
globals().update(PlistFormat.__members__)
+# Data larger than this will be read in chunks, to prevent extreme
+# overallocation.
+_MIN_READ_BUF_SIZE = 1 << 20
class UID:
def __init__(self, data):
@@ -499,12 +502,24 @@ def _get_size(self, tokenL):
return tokenL
+ def _read(self, size):
+ cursize = min(size, _MIN_READ_BUF_SIZE)
+ data = self._fp.read(cursize)
+ while True:
+ if len(data) != cursize:
+ raise InvalidFileException
+ if cursize == size:
+ return data
+ delta = min(cursize, size - cursize)
+ data += self._fp.read(delta)
+ cursize += delta
+
def _read_ints(self, n, size):
- data = self._fp.read(size * n)
+ data = self._read(size * n)
if size in _BINARY_FORMAT:
return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
else:
- if not size or len(data) != size * n:
+ if not size:
raise InvalidFileException()
return tuple(int.from_bytes(data[i: i + size], 'big')
for i in range(0, size * n, size))
@@ -561,22 +576,16 @@ def _read_object(self, ref):
elif tokenH == 0x40: # data
s = self._get_size(tokenL)
- result = self._fp.read(s)
- if len(result) != s:
- raise InvalidFileException()
+ result = self._read(s)
elif tokenH == 0x50: # ascii string
s = self._get_size(tokenL)
- data = self._fp.read(s)
- if len(data) != s:
- raise InvalidFileException()
+ data = self._read(s)
result = data.decode('ascii')
elif tokenH == 0x60: # unicode string
s = self._get_size(tokenL) * 2
- data = self._fp.read(s)
- if len(data) != s:
- raise InvalidFileException()
+ data = self._read(s)
result = data.decode('utf-16be')
elif tokenH == 0x80: # UID
diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py
index fa46050658afe0..229a5a242eced3 100644
--- a/Lib/test/test_plistlib.py
+++ b/Lib/test/test_plistlib.py
@@ -841,8 +841,7 @@ def test_xml_plist_with_entity_decl(self):
class TestBinaryPlistlib(unittest.TestCase):
- @staticmethod
- def decode(*objects, offset_size=1, ref_size=1):
+ def build(self, *objects, offset_size=1, ref_size=1):
data = [b'bplist00']
offset = 8
offsets = []
@@ -854,7 +853,11 @@ def decode(*objects, offset_size=1, ref_size=1):
len(objects), 0, offset)
data.extend(offsets)
data.append(tail)
- return plistlib.loads(b''.join(data), fmt=plistlib.FMT_BINARY)
+ return b''.join(data)
+
+ def decode(self, *objects, offset_size=1, ref_size=1):
+ data = self.build(*objects, offset_size=offset_size, ref_size=ref_size)
+ return plistlib.loads(data, fmt=plistlib.FMT_BINARY)
def test_nonstandard_refs_size(self):
# Issue #21538: Refs and offsets are 24-bit integers
@@ -963,6 +966,34 @@ def test_invalid_binary(self):
with self.assertRaises(plistlib.InvalidFileException):
plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY)
+ def test_truncated_large_data(self):
+ self.addCleanup(os_helper.unlink, os_helper.TESTFN)
+ def check(data):
+ with open(os_helper.TESTFN, 'wb') as f:
+ f.write(data)
+ # buffered file
+ with open(os_helper.TESTFN, 'rb') as f:
+ with self.assertRaises(plistlib.InvalidFileException):
+ plistlib.load(f, fmt=plistlib.FMT_BINARY)
+ # unbuffered file
+ with open(os_helper.TESTFN, 'rb', buffering=0) as f:
+ with self.assertRaises(plistlib.InvalidFileException):
+ plistlib.load(f, fmt=plistlib.FMT_BINARY)
+ for w in range(20, 64):
+ s = 1 << w
+ # data
+ check(self.build(b'\x4f\x13' + s.to_bytes(8, 'big')))
+ # ascii string
+ check(self.build(b'\x5f\x13' + s.to_bytes(8, 'big')))
+ # unicode string
+ check(self.build(b'\x6f\x13' + s.to_bytes(8, 'big')))
+ # array
+ check(self.build(b'\xaf\x13' + s.to_bytes(8, 'big')))
+ # dict
+ check(self.build(b'\xdf\x13' + s.to_bytes(8, 'big')))
+ # number of objects
+ check(b'bplist00' + struct.pack('>6xBBQQQ', 1, 1, s, 0, 8))
+
class TestKeyedArchive(unittest.TestCase):
def test_keyed_archive_data(self):
diff --git
a/Misc/NEWS.d/next/Security/2024-05-21-22-11-31.gh-issue-119342.BTFj4Z.rst
b/Misc/NEWS.d/next/Security/2024-05-21-22-11-31.gh-issue-119342.BTFj4Z.rst
new file mode 100644
index 00000000000000..04fd8faca4cf7e
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-05-21-22-11-31.gh-issue-119342.BTFj4Z.rst
@@ -0,0 +1,5 @@
+Fix a potential memory denial of service in the :mod:`plistlib` module.
+When reading a Plist file received from untrusted source, it could cause
+an arbitrary amount of memory to be allocated.
+This could have led to symptoms including a :exc:`MemoryError`, swapping, out
+of memory (OOM) killed processes or containers, or even system crashes.
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]