https://github.com/python/cpython/commit/c266f0c375c2e60ea46046254fa7cd5fa2fe1ca2
commit: c266f0c375c2e60ea46046254fa7cd5fa2fe1ca2
branch: main
author: Maurycy Pawłowski-Wieroński <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-05-05T00:50:06Z
summary:
gh-149009: Validate `thread_count` in `profiling.sampling` binary reader
(#149147)
files:
A Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst
M Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
M Modules/_remote_debugging/binary_io_reader.c
diff --git
a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
index ca6cb6befaed24..9cf706aa2dafee 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
@@ -3,6 +3,7 @@
import json
import os
import random
+import struct
import tempfile
import unittest
from collections import defaultdict
@@ -941,6 +942,35 @@ def
test_writer_total_samples_after_close_returns_zero(self):
self.assertEqual(w.total_samples, 0)
+class TestBinaryFormatValidation(BinaryFormatTestBase):
+ """Tests for malformed binary files."""
+
+ HDR_OFF_THREADS = 32
+
+ def test_replay_rejects_more_threads_than_declared(self):
+ """Replay rejects files with more unique threads than the header
declares."""
+ threads = [
+ make_thread(1, [make_frame("t1.py", 10, "t1")]),
+ make_thread(2, [make_frame("t2.py", 20, "t2")]),
+ ]
+ samples = [[make_interpreter(0, threads)]]
+ filename = self.create_binary_file(samples, compression="none")
+
+ with open(filename, "r+b") as raw:
+ raw.seek(self.HDR_OFF_THREADS)
+ raw.write(struct.pack("=I", 1))
+
+ with BinaryReader(filename) as reader:
+ self.assertEqual(reader.get_info()["thread_count"], 1)
+ with self.assertRaises(ValueError) as cm:
+ reader.replay_samples(RawCollector())
+ self.assertEqual(
+ str(cm.exception),
+ "Invalid thread count: sample data contains more unique "
+ "threads than declared in header (declared 1, found at least
2)",
+ )
+
+
class TestBinaryEncodings(BinaryFormatTestBase):
"""Tests specifically targeting different stack encodings."""
diff --git
a/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst
b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst
new file mode 100644
index 00000000000000..e2f078742760a5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst
@@ -0,0 +1,3 @@
+Validate that :mod:`profiling.sampling` binary profiles do not contain more
+unique (thread, interpreter) pairs than declared in the header. Patch by
+Maurycy Pawłowski-Wieroński.
diff --git a/Modules/_remote_debugging/binary_io_reader.c
b/Modules/_remote_debugging/binary_io_reader.c
index da3e7d55309c27..551530b519952c 100644
--- a/Modules/_remote_debugging/binary_io_reader.c
+++ b/Modules/_remote_debugging/binary_io_reader.c
@@ -559,6 +559,14 @@ reader_get_or_create_thread_state(BinaryReader *reader,
uint64_t thread_id,
}
}
+ if (reader->thread_state_count >= reader->thread_count) {
+ PyErr_Format(PyExc_ValueError,
+ "Invalid thread count: sample data contains more unique threads
than declared in header "
+ "(declared %u, found at least %zu)",
+ reader->thread_count, reader->thread_state_count + 1);
+ return NULL;
+ }
+
if (!reader->thread_states) {
reader->thread_state_capacity = 16;
reader->thread_states = PyMem_Calloc(reader->thread_state_capacity,
sizeof(ReaderThreadState));
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]