https://github.com/python/cpython/commit/04ce31852260b3d39e35286c1b6a134a3c475b22
commit: 04ce31852260b3d39e35286c1b6a134a3c475b22
branch: main
author: Maurycy Pawłowski-Wieroński <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-05-05T00:44:37Z
summary:
gh-146256: Add `--jsonl` collector to the `profiling.sampling` (#146257)
files:
A Lib/profiling/sampling/jsonl_collector.py
A Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
M Lib/profiling/sampling/__init__.py
M Lib/profiling/sampling/binary_reader.py
M Lib/profiling/sampling/cli.py
M Lib/profiling/sampling/collector.py
M Lib/profiling/sampling/constants.py
M Lib/test/test_profiling/test_sampling_profiler/helpers.py
M Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
M Lib/test/test_profiling/test_sampling_profiler/test_cli.py
M Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
M Modules/_remote_debugging/binary_io_reader.c
diff --git a/Lib/profiling/sampling/__init__.py
b/Lib/profiling/sampling/__init__.py
index 6a0bb5e5c2f387..71579a3903253e 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -9,6 +9,15 @@
from .stack_collector import CollapsedStackCollector
from .heatmap_collector import HeatmapCollector
from .gecko_collector import GeckoCollector
+from .jsonl_collector import JsonlCollector
from .string_table import StringTable
-__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector",
"HeatmapCollector", "GeckoCollector", "StringTable")
+__all__ = (
+ "Collector",
+ "PstatsCollector",
+ "CollapsedStackCollector",
+ "HeatmapCollector",
+ "GeckoCollector",
+ "JsonlCollector",
+ "StringTable",
+)
diff --git a/Lib/profiling/sampling/binary_reader.py
b/Lib/profiling/sampling/binary_reader.py
index a11be3652597a6..a29dad91ae339d 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -4,6 +4,7 @@
from .gecko_collector import GeckoCollector
from .stack_collector import FlamegraphCollector, CollapsedStackCollector
+from .jsonl_collector import JsonlCollector
from .pstats_collector import PstatsCollector
@@ -117,6 +118,8 @@ def convert_binary_to_format(input_file, output_file,
output_format,
collector = PstatsCollector(interval)
elif output_format == 'gecko':
collector = GeckoCollector(interval)
+ elif output_format == "jsonl":
+ collector = JsonlCollector(interval)
else:
raise ValueError(f"Unknown output format: {output_format}")
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index 9900415ae8a927..0648713edc52af 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -20,6 +20,7 @@
from .stack_collector import CollapsedStackCollector, FlamegraphCollector,
DiffFlamegraphCollector
from .heatmap_collector import HeatmapCollector
from .gecko_collector import GeckoCollector
+from .jsonl_collector import JsonlCollector
from .binary_collector import BinaryCollector
from .binary_reader import BinaryReader
from .constants import (
@@ -101,6 +102,7 @@ def __call__(self, parser, namespace, values,
option_string=None):
"diff_flamegraph": "html",
"gecko": "json",
"heatmap": "html",
+ "jsonl": "jsonl",
"binary": "bin",
}
@@ -111,6 +113,7 @@ def __call__(self, parser, namespace, values,
option_string=None):
"diff_flamegraph": DiffFlamegraphCollector,
"gecko": GeckoCollector,
"heatmap": HeatmapCollector,
+ "jsonl": JsonlCollector,
"binary": BinaryCollector,
}
@@ -488,6 +491,13 @@ def _add_format_options(parser, include_compression=True,
include_binary=True):
action=DiffFlamegraphAction,
help="Generate differential flamegraph comparing current profile to
`BASELINE` binary file",
)
+ format_group.add_argument(
+ "--jsonl",
+ action="store_const",
+ const="jsonl",
+ dest="format",
+ help="Generate newline-delimited JSON (JSONL) for programmatic
consumers",
+ )
if include_binary:
format_group.add_argument(
"--binary",
@@ -611,15 +621,18 @@ def _sort_to_mode(sort_choice):
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
def _create_collector(format_type, sample_interval_usec, skip_idle,
opcodes=False,
- output_file=None, compression='auto',
diff_baseline=None):
+ mode=None, output_file=None, compression='auto',
+ diff_baseline=None):
"""Create the appropriate collector based on format type.
Args:
- format_type: The output format ('pstats', 'collapsed', 'flamegraph',
'gecko', 'heatmap', 'binary', 'diff_flamegraph')
+ format_type: The output format ('pstats', 'collapsed', 'flamegraph',
+ 'gecko', 'heatmap', 'jsonl', 'binary', 'diff_flamegraph')
sample_interval_usec: Sampling interval in microseconds
skip_idle: Whether to skip idle samples
opcodes: Whether to collect opcode information (only used by gecko
format
for creating interval markers in Firefox Profiler)
+ mode: Profiling mode for collectors that expose it in metadata
output_file: Output file path (required for binary format)
compression: Compression type for binary format ('auto', 'zstd',
'none')
diff_baseline: Path to baseline binary file for differential flamegraph
@@ -655,6 +668,11 @@ def _create_collector(format_type, sample_interval_usec,
skip_idle, opcodes=Fals
skip_idle = False
return collector_class(sample_interval_usec, skip_idle=skip_idle,
opcodes=opcodes)
+ if format_type == "jsonl":
+ return collector_class(
+ sample_interval_usec, skip_idle=skip_idle, mode=mode
+ )
+
return collector_class(sample_interval_usec, skip_idle=skip_idle)
@@ -1142,7 +1160,7 @@ def _handle_attach(args):
# Create the appropriate collector
collector = _create_collector(
- args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+ args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
output_file=output_file,
compression=getattr(args, 'compression', 'auto'),
diff_baseline=args.diff_baseline
@@ -1249,7 +1267,7 @@ def _handle_run(args):
# Create the appropriate collector
collector = _create_collector(
- args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+ args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
output_file=output_file,
compression=getattr(args, 'compression', 'auto'),
diff_baseline=args.diff_baseline
diff --git a/Lib/profiling/sampling/collector.py
b/Lib/profiling/sampling/collector.py
index 08759b611696b7..81ec6344ebdea4 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -20,13 +20,16 @@ def normalize_location(location):
"""Normalize location to a 4-tuple format.
Args:
- location: tuple (lineno, end_lineno, col_offset, end_col_offset) or
None
+ location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+ an integer line number, or None
Returns:
tuple: (lineno, end_lineno, col_offset, end_col_offset)
"""
if location is None:
return DEFAULT_LOCATION
+ if isinstance(location, int):
+ return (location, location, -1, -1)
return location
@@ -34,13 +37,16 @@ def extract_lineno(location):
"""Extract lineno from location.
Args:
- location: tuple (lineno, end_lineno, col_offset, end_col_offset) or
None
+ location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+ an integer line number, or None
Returns:
int: The line number (0 for synthetic frames)
"""
if location is None:
return 0
+ if isinstance(location, int):
+ return location
return location[0]
def _is_internal_frame(frame):
diff --git a/Lib/profiling/sampling/constants.py
b/Lib/profiling/sampling/constants.py
index a364d0b8fde1e0..d7c710f943b1b7 100644
--- a/Lib/profiling/sampling/constants.py
+++ b/Lib/profiling/sampling/constants.py
@@ -11,6 +11,14 @@
PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
PROFILING_MODE_EXCEPTION = 4 # Only samples when thread has an active
exception
+PROFILING_MODE_NAMES = {
+ PROFILING_MODE_WALL: "wall",
+ PROFILING_MODE_CPU: "cpu",
+ PROFILING_MODE_GIL: "gil",
+ PROFILING_MODE_ALL: "all",
+ PROFILING_MODE_EXCEPTION: "exception",
+}
+
# Sort mode constants
SORT_MODE_NSAMPLES = 0
SORT_MODE_TOTTIME = 1
diff --git a/Lib/profiling/sampling/jsonl_collector.py
b/Lib/profiling/sampling/jsonl_collector.py
new file mode 100644
index 00000000000000..7d26129b80de86
--- /dev/null
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -0,0 +1,266 @@
+"""JSON Lines (JSONL) collector for the sampling profiler.
+
+Emits a normalized newline-delimited JSON record stream suitable for
+programmatic consumption by external tools, scripts, and agents. Each line
+is one JSON object; consumers can parse the file incrementally line by
+line, but the producer writes the whole file at the end of the run (it is
+not a live/streaming producer).
+
+Record schema
+=============
+
+Every record is a JSON object with at least ``"type"``, ``"v"`` (record
+schema version), and ``"run_id"`` (UUID4 hex tagging the run; allows
+demultiplexing concatenated streams). Records appear in this fixed order:
+
+1. ``meta`` (exactly one, first line)::
+
+ {"type":"meta","v":0,"run_id":"<hex>",
+ "sample_interval_usec":<int>,"mode":"wall|cpu|gil|all|exception"}
+
+ ``mode`` is omitted when not provided.
+
+2. ``string_table`` (zero or more)::
+
+ {"type":"string_table","v":0,"run_id":"<hex>",
+ "strings":[{"str_id":<int>,"value":"<str>"}, ...]}
+
+ Strings (filenames, function names) are interned to keep repeated values
+ compact. IDs are zero-based. Each chunk holds up to ``_CHUNK_SIZE``
+ entries, and each entry carries its explicit ``str_id`` so consumers do
+ not need to infer offsets across chunks.
+
+3. ``frame_table`` (zero or more)::
+
+ {"type":"frame_table","v":0,"run_id":"<hex>",
+ "frames":[{"frame_id":<int>,"path_str_id":<int>,"func_str_id":<int>,
+ "line":<int>,"end_line":<int>,"col":<int>,
+ "end_col":<int>}, ...]}
+
+ ``end_line``/``col``/``end_col`` are *omitted* when source location data
+ is unavailable (a missing key means "not available", not zero or null).
+ ``line`` is ``0`` for synthetic frames (for example, internal marker
+ frames whose source location is None). Frame IDs are zero-based.
+
+4. ``agg`` (zero or more)::
+
+ {"type":"agg","v":0,"run_id":"<hex>","kind":"frame","scope":"final",
+ "samples_total":<int>,
+ "entries":[{"frame_id":<int>,"self":<int>,"cumulative":<int>}, ...]}
+
+ ``self`` counts samples where the frame was the leaf (currently
+ executing); ``cumulative`` counts samples where the frame appeared
+ anywhere in the stack (deduped per sample so recursion does not
+ double-count). ``samples_total`` is the run-wide total, repeated on
+ each chunk so a streaming consumer always knows the denominator.
+
+5. ``end`` (exactly one, last line)::
+
+ {"type":"end","v":0,"run_id":"<hex>","samples_total":<int>}
+
+ Presence of ``end`` is the consumer's signal that the file is complete.
+
+Forward compatibility
+=====================
+
+Consumers MUST ignore unknown record ``"type"`` values and unknown object
+fields. New fields will be added by adding optional keys; an incompatible
+schema change will bump the per-record ``"v"``.
+"""
+
+from collections import Counter
+import json
+import uuid
+from itertools import batched
+
+from .constants import PROFILING_MODE_NAMES
+from .collector import normalize_location
+from .stack_collector import StackTraceCollector
+
+
+_CHUNK_SIZE = 256
+_SCHEMA_VERSION = 0
+
+
+class JsonlCollector(StackTraceCollector):
+ """Collector that exports finalized profiling data as JSONL.
+
+ See the module docstring for the full record schema. The collector
+ accumulates samples in memory and writes the complete file at
+ ``export()`` time.
+ """
+
+ def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
+ super().__init__(sample_interval_usec, skip_idle=skip_idle)
+ self.run_id = uuid.uuid4().hex
+
+ self._string_to_id = {}
+ self._strings = []
+
+ self._frame_to_id = {}
+ self._frames = []
+
+ self._frame_self = Counter()
+ self._frame_cumulative = Counter()
+ self._samples_total = 0
+ self._seen_frame_ids = set()
+
+ self._mode = mode
+
+ def process_frames(self, frames, _thread_id, weight=1):
+ self._samples_total += weight
+ self._seen_frame_ids.clear()
+
+ for i, (filename, location, funcname, _opcode) in enumerate(frames):
+ frame_id = self._get_or_create_frame_id(
+ filename, location, funcname
+ )
+ is_leaf = i == 0
+ count_cumulative = frame_id not in self._seen_frame_ids
+
+ if count_cumulative:
+ self._seen_frame_ids.add(frame_id)
+
+ if is_leaf:
+ self._frame_self[frame_id] += weight
+
+ if count_cumulative:
+ self._frame_cumulative[frame_id] += weight
+
+ def export(self, filename):
+ with open(filename, "w", encoding="utf-8") as output:
+ self._write_message(output, self._build_meta_record())
+ self._write_chunked_records(
+ output,
+ {
+ "type": "string_table",
+ "v": _SCHEMA_VERSION,
+ "run_id": self.run_id,
+ },
+ "strings",
+ self._strings,
+ )
+ self._write_chunked_records(
+ output,
+ {
+ "type": "frame_table",
+ "v": _SCHEMA_VERSION,
+ "run_id": self.run_id,
+ },
+ "frames",
+ self._frames,
+ )
+ self._write_chunked_records(
+ output,
+ {
+ "type": "agg",
+ "v": _SCHEMA_VERSION,
+ "run_id": self.run_id,
+ "kind": "frame",
+ "scope": "final",
+ "samples_total": self._samples_total,
+ },
+ "entries",
+ self._iter_final_agg_entries(),
+ )
+ self._write_message(output, self._build_end_record())
+
+ def _build_meta_record(self):
+ record = {
+ "type": "meta",
+ "v": _SCHEMA_VERSION,
+ "run_id": self.run_id,
+ "sample_interval_usec": self.sample_interval_usec,
+ }
+
+ if self._mode is not None:
+ record["mode"] = PROFILING_MODE_NAMES.get(
+ self._mode, str(self._mode)
+ )
+
+ return record
+
+ def _build_end_record(self):
+ record = {
+ "type": "end",
+ "v": _SCHEMA_VERSION,
+ "run_id": self.run_id,
+ "samples_total": self._samples_total,
+ }
+
+ return record
+
+ def _iter_final_agg_entries(self):
+ for frame_record in self._frames:
+ frame_id = frame_record["frame_id"]
+ yield {
+ "frame_id": frame_id,
+ "self": self._frame_self[frame_id],
+ "cumulative": self._frame_cumulative[frame_id],
+ }
+
+ def _get_or_create_frame_id(self, filename, location, funcname):
+ location_fields = self._location_to_export_fields(location)
+ func_str_id = self._intern_string(funcname)
+ path_str_id = self._intern_string(filename)
+
+ frame_key = (
+ path_str_id,
+ func_str_id,
+ location_fields["line"],
+ location_fields.get("end_line"),
+ location_fields.get("col"),
+ location_fields.get("end_col"),
+ )
+
+ if (frame_id := self._frame_to_id.get(frame_key)) is not None:
+ return frame_id
+
+ frame_id = len(self._frames)
+ frame_record = {
+ "frame_id": frame_id,
+ "path_str_id": path_str_id,
+ "func_str_id": func_str_id,
+ **location_fields,
+ }
+
+ self._frame_to_id[frame_key] = frame_id
+ self._frames.append(frame_record)
+ return frame_id
+
+ def _intern_string(self, value):
+ value = str(value)
+
+ if (string_id := self._string_to_id.get(value)) is not None:
+ return string_id
+
+ string_id = len(self._strings)
+ self._string_to_id[value] = string_id
+ self._strings.append({"str_id": string_id, "value": value})
+ return string_id
+
+ @staticmethod
+ def _location_to_export_fields(location):
+ lineno, end_lineno, col_offset, end_col_offset = normalize_location(
+ location
+ )
+
+ fields = {"line": lineno}
+ if end_lineno > 0:
+ fields["end_line"] = end_lineno
+ if col_offset >= 0:
+ fields["col"] = col_offset
+ if end_col_offset >= 0:
+ fields["end_col"] = end_col_offset
+ return fields
+
+ def _write_chunked_records(
+ self, output, base_record, chunk_field, entries
+ ):
+ for chunk in batched(entries, _CHUNK_SIZE):
+ self._write_message(output, {**base_record, chunk_field: chunk})
+
+ @staticmethod
+ def _write_message(output, record):
+ output.write(json.dumps(record, separators=(",", ":")))
+ output.write("\n")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/helpers.py
b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
index 0e32d8dd9eabef..b07776d415bb29 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/helpers.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
@@ -174,3 +174,29 @@ def close_and_unlink(file):
"""Close a file and unlink it from the filesystem."""
file.close()
unlink(file.name)
+
+
+def jsonl_tables(records):
+ """Extract the canonical sections of a parsed JSONL profile.
+
+ Returns ``(meta, str_defs, frame_defs, agg, end)`` where ``str_defs`` is a
+ ``{str_id: value}`` dict, ``frame_defs`` is a flat list of all frame
+ definitions across chunks, and ``agg`` is the first agg record (sufficient
+ for tests that only emit one chunk).
+ """
+ meta = next(record for record in records if record["type"] == "meta")
+ end = next(record for record in records if record["type"] == "end")
+ agg = next(record for record in records if record["type"] == "agg")
+ str_defs = {
+ item["str_id"]: item["value"]
+ for record in records
+ if record["type"] == "string_table"
+ for item in record["strings"]
+ }
+ frame_defs = [
+ item
+ for record in records
+ if record["type"] == "frame_table"
+ for item in record["frames"]
+ ]
+ return meta, str_defs, frame_defs, agg, end
diff --git
a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
index 7e6cb724c407e3..ca6cb6befaed24 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
@@ -1,5 +1,6 @@
"""Tests for binary format round-trip functionality."""
+import json
import os
import random
import tempfile
@@ -21,7 +22,7 @@
THREAD_STATUS_MAIN_THREAD,
)
from profiling.sampling.binary_collector import BinaryCollector
- from profiling.sampling.binary_reader import BinaryReader
+ from profiling.sampling.binary_reader import BinaryReader,
convert_binary_to_format
from profiling.sampling.gecko_collector import GeckoCollector
ZSTD_AVAILABLE = _remote_debugging.zstd_available()
@@ -30,6 +31,8 @@
"Test only runs when _remote_debugging is available"
)
+from .helpers import jsonl_tables
+
def make_frame(filename, lineno, funcname, end_lineno=None, column=None,
end_column=None, opcode=None):
@@ -1343,5 +1346,70 @@ def test_timestamp_preservation_with_rle(self):
self.assertEqual(ts_collector.all_timestamps, expected_timestamps)
+class TestBinaryReplayToJsonl(BinaryFormatTestBase):
+ """Tests for binary -> JSONL replay via convert_binary_to_format."""
+
+ def _replay_to_jsonl(self, samples, interval=1000):
+ bin_path = self.create_binary_file(samples, interval=interval)
+ with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
+ jsonl_path = f.name
+ self.temp_files.append(jsonl_path)
+
+ convert_binary_to_format(bin_path, jsonl_path, "jsonl")
+
+ with open(jsonl_path, "r", encoding="utf-8") as f:
+ return [json.loads(line) for line in f]
+
+ def test_binary_replay_to_jsonl_basic(self):
+ """Replay a small .bin to JSONL: meta/end shape, samples_total,
run_id."""
+ frame = make_frame("hot.py", 99, "hot_func")
+ samples = [
+ [make_interpreter(0, [make_thread(1, [frame])])]
+ for _ in range(5)
+ ]
+ records = self._replay_to_jsonl(samples, interval=2000)
+ meta, _, frame_defs, _, end = jsonl_tables(records)
+
+ self.assertEqual(meta["sample_interval_usec"], 2000)
+ self.assertEqual(end["samples_total"], 5)
+
+ run_ids = {r["run_id"] for r in records}
+ self.assertEqual(len(run_ids), 1)
+ self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
+
+ self.assertEqual(len(frame_defs), 1)
+ self.assertEqual(frame_defs[0]["line"], 99)
+
+ def test_binary_replay_to_jsonl_rle_weight_propagation(self):
+ """RLE-batched identical samples land as a single agg entry with the
right total."""
+ frame = make_frame("rle.py", 42, "rle_func")
+ samples = [
+ [make_interpreter(0, [make_thread(1, [frame])])]
+ for _ in range(50)
+ ]
+ records = self._replay_to_jsonl(samples)
+ _, _, _, agg, end = jsonl_tables(records)
+
+ self.assertEqual(end["samples_total"], 50)
+ self.assertEqual(agg["entries"], [
+ {"frame_id": 0, "self": 50, "cumulative": 50},
+ ])
+
+ def test_binary_replay_to_jsonl_omits_unavailable_columns(self):
+ """Columns the binary recorder did not capture are omitted, not 0."""
+ # make_frame defaults column/end_column to 0; pass column=-1 /
end_column=-1
+ # so the binary side records LOCATION_NOT_AVAILABLE.
+ frame = make_frame("nocol.py", 7, "no_col", column=-1, end_column=-1)
+ samples = [[make_interpreter(0, [make_thread(1, [frame])])]]
+ records = self._replay_to_jsonl(samples)
+ _, _, frame_defs, _, _ = jsonl_tables(records)
+
+ self.assertEqual(len(frame_defs), 1)
+ fd = frame_defs[0]
+ self.assertEqual(fd["line"], 7)
+ self.assertNotIn("col", fd)
+ self.assertNotIn("end_col", fd)
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index c522c50d1fd5fa..9c0734ac804e1b 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -1,6 +1,7 @@
"""Tests for sampling profiler CLI argument parsing and functionality."""
import io
+import json
import os
import subprocess
import sys
@@ -21,9 +22,19 @@
requires_remote_subprocess_debugging,
)
-from profiling.sampling.cli import main
-from profiling.sampling.constants import PROFILING_MODE_ALL,
PROFILING_MODE_WALL
+from profiling.sampling.cli import (
+ FORMAT_EXTENSIONS,
+ _create_collector,
+ _generate_output_filename,
+ main,
+)
+from profiling.sampling.constants import (
+ PROFILING_MODE_ALL,
+ PROFILING_MODE_CPU,
+ PROFILING_MODE_WALL,
+)
from profiling.sampling.errors import SamplingScriptNotFoundError,
SamplingModuleNotFoundError, SamplingUnknownProcessError
+from profiling.sampling.jsonl_collector import JsonlCollector
class TestSampleProfilerCLI(unittest.TestCase):
def _setup_sync_mocks(self, mock_socket, mock_popen):
@@ -912,3 +923,65 @@ def test_cli_replay_reader_errors_exit_cleanly(self):
str(cm.exception),
"Error: Unsupported format version 2",
)
+
+ def test_cli_jsonl_format_mutually_exclusive_with_pstats(self):
+ """--jsonl and --pstats cannot be combined (mutually exclusive
group)."""
+ with (
+ mock.patch(
+ "sys.argv",
+ [
+ "profiling.sampling.cli",
+ "attach",
+ "12345",
+ "--jsonl",
+ "--pstats",
+ ],
+ ),
+ mock.patch("sys.stderr", io.StringIO()),
+ ):
+ with self.assertRaises(SystemExit):
+ main()
+
+ def test_cli_jsonl_extension_in_format_extensions(self):
+ """FORMAT_EXTENSIONS maps 'jsonl' -> 'jsonl' so default filenames
work."""
+ self.assertEqual(FORMAT_EXTENSIONS["jsonl"], "jsonl")
+ self.assertEqual(_generate_output_filename("jsonl", 12345),
"jsonl_12345.jsonl")
+
+ def test_cli_jsonl_create_collector_propagates_mode(self):
+ """_create_collector('jsonl', ..., mode=X) lands X in the meta
record."""
+ collector = _create_collector(
+ "jsonl",
+ sample_interval_usec=1000,
+ skip_idle=False,
+ mode=PROFILING_MODE_CPU,
+ )
+ self.assertIsInstance(collector, JsonlCollector)
+
+ with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
+ jsonl_path = f.name
+ self.addCleanup(os.unlink, jsonl_path)
+ collector.export(jsonl_path)
+ with open(jsonl_path, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+ meta = next(r for r in records if r["type"] == "meta")
+ self.assertEqual(meta["mode"], "cpu")
+
+ def test_cli_jsonl_rejects_opcodes_combination(self):
+ """--opcodes is incompatible with --jsonl per
opcodes_compatible_formats."""
+ test_args = [
+ "profiling.sampling.cli",
+ "attach",
+ "12345",
+ "--jsonl",
+ "--opcodes",
+ ]
+ with (
+ mock.patch("sys.argv", test_args),
+ mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+ mock.patch("profiling.sampling.cli.sample"),
+ self.assertRaises(SystemExit) as cm,
+ ):
+ main()
+
+ self.assertEqual(cm.exception.code, 2)
+ self.assertIn("--opcodes", mock_stderr.getvalue())
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 240ec8a195c43b..b42e7aa579f40c 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -16,6 +16,7 @@
CollapsedStackCollector,
FlamegraphCollector,
)
+ from profiling.sampling.jsonl_collector import JsonlCollector
from profiling.sampling.gecko_collector import GeckoCollector
from profiling.sampling.collector import extract_lineno, normalize_location
from profiling.sampling.opcode_utils import get_opcode_info, format_opcode
@@ -38,7 +39,7 @@
from test.support import captured_stdout, captured_stderr
from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo,
LocationInfo, make_diff_collector_with_mock_baseline
-from .helpers import close_and_unlink
+from .helpers import close_and_unlink, jsonl_tables
def resolve_name(node, strings):
@@ -1669,6 +1670,393 @@ def test_diff_flamegraph_load_baseline(self):
self.assertAlmostEqual(cold_node["diff"], -1.0)
self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
+ def test_jsonl_collector_export_exact_output(self):
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+ collector.run_id = "run-123"
+
+ test_frames1 = [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1,
+ [
+ MockFrameInfo("file.py", 10, "func1"),
+ MockFrameInfo("file.py", 20, "func2"),
+ ],
+ )
+ ],
+ )
+ ]
+ test_frames2 = [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1,
+ [
+ MockFrameInfo("file.py", 10, "func1"),
+ MockFrameInfo("file.py", 20, "func2"),
+ ],
+ )
+ ],
+ )
+ ] # Same stack
+ test_frames3 = [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1, [MockFrameInfo("other.py", 5, "other_func")]
+ )
+ ],
+ )
+ ]
+
+ collector.collect(test_frames1)
+ collector.collect(test_frames2)
+ collector.collect(test_frames3)
+
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ content = f.read()
+
+ self.assertEqual(
+ content,
+ (
+
'{"type":"meta","v":0,"run_id":"run-123","sample_interval_usec":1000}\n'
+
'{"type":"string_table","v":0,"run_id":"run-123","strings":[{"str_id":0,"value":"func1"},{"str_id":1,"value":"file.py"},{"str_id":2,"value":"func2"},{"str_id":3,"value":"other_func"},{"str_id":4,"value":"other.py"}]}\n'
+
'{"type":"frame_table","v":0,"run_id":"run-123","frames":[{"frame_id":0,"path_str_id":1,"func_str_id":0,"line":10,"end_line":10},{"frame_id":1,"path_str_id":1,"func_str_id":2,"line":20,"end_line":20},{"frame_id":2,"path_str_id":4,"func_str_id":3,"line":5,"end_line":5}]}\n'
+
'{"type":"agg","v":0,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":0,"self":2,"cumulative":2},{"frame_id":1,"self":0,"cumulative":2},{"frame_id":2,"self":1,"cumulative":1}]}\n'
+ '{"type":"end","v":0,"run_id":"run-123","samples_total":3}\n'
+ ),
+ )
+
+ def test_jsonl_collector_export_includes_mode_in_meta(self):
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000, mode=PROFILING_MODE_CPU)
+ collector.collect(
+ [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1, [MockFrameInfo("file.py", 10, "func")]
+ )
+ ],
+ )
+ ]
+ )
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ meta_record = next(
+ record for record in records if record["type"] == "meta"
+ )
+ self.assertEqual(meta_record["mode"], "cpu")
+
+ def test_jsonl_collector_export_empty_profile(self):
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+ collector.run_id = "run-123"
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ self.assertEqual(
+ [record["type"] for record in records], ["meta", "end"]
+ )
+ self.assertEqual(records[0]["sample_interval_usec"], 1000)
+ self.assertEqual(records[0]["run_id"], "run-123")
+ self.assertEqual(records[1]["samples_total"], 0)
+ self.assertEqual(records[1]["run_id"], "run-123")
+
+ def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+ collector.collect(
+ [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1,
+ [
+ MockFrameInfo(
+ "recursive.py", 10, "recursive_func"
+ ),
+ MockFrameInfo(
+ "recursive.py", 10, "recursive_func"
+ ),
+ MockFrameInfo(
+ "recursive.py", 10, "recursive_func"
+ ),
+ ],
+ )
+ ],
+ )
+ ]
+ )
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ _, _, frame_defs, agg_record, end_record = jsonl_tables(records)
+ self.assertEqual(len(frame_defs), 1)
+ self.assertEqual(
+ agg_record["entries"],
+ [
+ {
+ "frame_id": frame_defs[0]["frame_id"],
+ "self": 1,
+ "cumulative": 1,
+ }
+ ],
+ )
+ self.assertEqual(agg_record["samples_total"], 1)
+ self.assertEqual(end_record["samples_total"], 1)
+
+ def test_jsonl_collector_skip_idle_filters_threads(self):
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ active_status = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+ frames = [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1,
+ [MockFrameInfo("active1.py", 10, "active_func1")],
+ status=active_status,
+ ),
+ MockThreadInfo(
+ 2,
+ [MockFrameInfo("idle.py", 20, "idle_func")],
+ status=0,
+ ),
+ MockThreadInfo(
+ 3,
+ [MockFrameInfo("active2.py", 30, "active_func2")],
+ status=active_status,
+ ),
+ ],
+ )
+ ]
+
+ def export_summary(skip_idle):
+ collector = JsonlCollector(1000, skip_idle=skip_idle)
+ collector.collect(frames)
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ _, str_defs, frame_defs, agg_record, _ = jsonl_tables(records)
+ paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+ funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+ return paths, funcs, agg_record["samples_total"]
+
+ paths, funcs, samples_total = export_summary(skip_idle=True)
+ self.assertEqual(paths, {"active1.py", "active2.py"})
+ self.assertEqual(funcs, {"active_func1", "active_func2"})
+ self.assertEqual(samples_total, 2)
+
+ paths, funcs, samples_total = export_summary(skip_idle=False)
+ self.assertEqual(paths, {"active1.py", "idle.py", "active2.py"})
+ self.assertEqual(funcs, {"active_func1", "idle_func", "active_func2"})
+ self.assertEqual(samples_total, 3)
+
+ def test_jsonl_collector_splits_large_exports_into_chunks(self):
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+
+ for i in range(257):
+ collector.collect(
+ [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1,
+ [
+ MockFrameInfo(
+ f"file{i}.py", i + 1, f"func{i}"
+ )
+ ],
+ )
+ ],
+ )
+ ]
+ )
+
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ run_ids = {record["run_id"] for record in records}
+ self.assertEqual(len(run_ids), 1)
+ self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
+
+ _, str_defs, frame_defs, agg_record, end_record = jsonl_tables(
+ records
+ )
+ str_chunks = [
+ record for record in records if record["type"] == "string_table"
+ ]
+ frame_chunks = [
+ record for record in records if record["type"] == "frame_table"
+ ]
+ agg_chunks = [record for record in records if record["type"] == "agg"]
+
+ self.assertEqual(
+ [len(record["strings"]) for record in str_chunks],
+ [256, 256, 2],
+ )
+ self.assertEqual(
+ [len(record["frames"]) for record in frame_chunks], [256, 1]
+ )
+ self.assertEqual(
+ [len(record["entries"]) for record in agg_chunks], [256, 1]
+ )
+ self.assertEqual(len(str_defs), 514)
+ self.assertEqual(len(frame_defs), 257)
+ self.assertEqual(agg_record["samples_total"], 257)
+ self.assertEqual(end_record["samples_total"], 257)
+
+ def test_jsonl_collector_respects_weight_for_rle_batched_samples(self):
+ """weight>1 (from binary replay RLE) is honored in self/cumulative."""
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+ leaf = MockFrameInfo("file.py", 10, "leaf")
+ non_leaf = MockFrameInfo("file.py", 20, "non_leaf")
+
+ collector.process_frames([leaf, non_leaf], _thread_id=1, weight=5)
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ _, str_defs, frame_defs, agg, end = jsonl_tables(records)
+ self.assertEqual(end["samples_total"], 5)
+ self.assertEqual(agg["samples_total"], 5)
+ self.assertEqual(
+ {str_defs[fd["func_str_id"]]: fd["frame_id"] for fd in frame_defs},
+ {"leaf": 0, "non_leaf": 1},
+ )
+ self.assertEqual(agg["entries"], [
+ {"frame_id": 0, "self": 5, "cumulative": 5},
+ {"frame_id": 1, "self": 0, "cumulative": 5},
+ ])
+
+ def test_jsonl_collector_recursion_with_weight(self):
+ """Recursion dedup respects weight, not occurrence count."""
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+ recursive = MockFrameInfo("rec.py", 10, "f")
+
+ collector.process_frames([recursive] * 3, _thread_id=1, weight=3)
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ _, _, frame_defs, agg, _ = jsonl_tables(records)
+ self.assertEqual(len(frame_defs), 1)
+ self.assertEqual(agg["entries"], [
+ {"frame_id": 0, "self": 3, "cumulative": 3},
+ ])
+
+ def test_jsonl_collector_emits_col_and_end_col_when_present(self):
+ """All four location fields are emitted when col/end_col are >= 0."""
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+ frame = MockFrameInfo("test.py", 0, "f")
+ frame.location = LocationInfo(42, 45, 4, 12)
+ frames = [
+ MockInterpreterInfo(
+ 0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+ )
+ ]
+ collector.collect(frames)
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ _, str_defs, frame_defs, _, _ = jsonl_tables(records)
+ self.assertEqual(frame_defs, [
+ {
+ "frame_id": 0,
+ "path_str_id": 1,
+ "func_str_id": 0,
+ "line": 42,
+ "end_line": 45,
+ "col": 4,
+ "end_col": 12,
+ },
+ ])
+ self.assertEqual(str_defs, {0: "f", 1: "test.py"})
+
+ def test_jsonl_collector_partial_location_elision(self):
+ """Negative col/end_col/end_line fields are individually elided."""
+ # _get_or_create_frame_id interns funcname before filename, so
+ # func_str_id=0 ("f") and path_str_id=1 ("test.py").
+ common = {"frame_id": 0, "path_str_id": 1, "func_str_id": 0}
+ cases = [
+ (LocationInfo(42, 45, -1, 12),
+ {**common, "line": 42, "end_line": 45, "end_col": 12}),
+ (LocationInfo(42, 45, 4, -1),
+ {**common, "line": 42, "end_line": 45, "col": 4}),
+ (LocationInfo(42, 0, 4, 8),
+ {**common, "line": 42, "col": 4, "end_col": 8}),
+ ]
+ for loc, expected_frame_def in cases:
+ with self.subTest(location=loc):
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(1000)
+ frame = MockFrameInfo("test.py", 0, "f")
+ frame.location = loc
+ frames = [
+ MockInterpreterInfo(
+ 0,
+ [MockThreadInfo(1, [frame],
status=THREAD_STATUS_HAS_GIL)],
+ )
+ ]
+ collector.collect(frames)
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ _, _, frame_defs, _, _ = jsonl_tables(records)
+ self.assertEqual(frame_defs, [expected_frame_def])
+
class TestRecursiveFunctionHandling(unittest.TestCase):
"""Tests for correct handling of recursive functions in cumulative
stats."""
@@ -1878,6 +2266,20 @@ def test_extract_lineno_from_none(self):
"""Test extracting lineno from None (synthetic frames)."""
self.assertEqual(extract_lineno(None), 0)
+ def test_extract_lineno_from_int(self):
+ """Test extracting lineno from a bare integer line number.
+
+ Mirrors normalize_location's int contract so callers like the
+ collapsed/flamegraph collectors do not crash on a bare-int location.
+ """
+ self.assertEqual(extract_lineno(42), 42)
+ self.assertEqual(extract_lineno(0), 0)
+
+ def test_normalize_location_with_int(self):
+ """Test normalize_location expands a legacy integer line number."""
+ result = normalize_location(42)
+ self.assertEqual(result, (42, 42, -1, -1))
+
def test_normalize_location_with_location_info(self):
"""Test normalize_location passes through LocationInfo."""
loc = LocationInfo(10, 15, 0, 5)
@@ -2068,6 +2470,85 @@ def test_gecko_collector_with_location_info(self):
# Verify function name is in string table
self.assertIn("handle_request", string_array)
+ def test_jsonl_collector_with_location_info(self):
+ """Test JsonlCollector handles LocationInfo properly."""
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(sample_interval_usec=1000)
+
+ # Frame with LocationInfo
+ frame = MockFrameInfo("test.py", 42, "my_function")
+ frames = [
+ MockInterpreterInfo(
+ 0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+ )
+ ]
+ collector.collect(frames)
+
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ meta, str_defs, frame_defs, agg, end = jsonl_tables(records)
+ self.assertEqual(meta["sample_interval_usec"], 1000)
+ self.assertEqual(agg["samples_total"], 1)
+ self.assertEqual(end["samples_total"], 1)
+ self.assertEqual(len(frame_defs), 1)
+ self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "test.py")
+ self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "my_function")
+ self.assertEqual(
+ frame_defs[0],
+ {
+ "frame_id": 0,
+ "path_str_id": frame_defs[0]["path_str_id"],
+ "func_str_id": frame_defs[0]["func_str_id"],
+ "line": 42,
+ "end_line": 42,
+ },
+ )
+
+ def test_jsonl_collector_with_none_location(self):
+ """Test JsonlCollector handles None location (synthetic frames)."""
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ collector = JsonlCollector(sample_interval_usec=1000)
+
+ # Create frame with None location (like GC frame)
+ frame = MockFrameInfo("~", 0, "<GC>")
+ frame.location = None # Synthetic frame has no location
+ frames = [
+ MockInterpreterInfo(
+ 0,
+ [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+ )
+ ]
+ collector.collect(frames)
+
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ meta, str_defs, frame_defs, agg, end = jsonl_tables(records)
+ self.assertEqual(meta["sample_interval_usec"], 1000)
+ self.assertEqual(agg["samples_total"], 1)
+ self.assertEqual(end["samples_total"], 1)
+ self.assertEqual(len(frame_defs), 1)
+ self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "~")
+ self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "<GC>")
+ self.assertEqual(
+ frame_defs[0],
+ {
+ "frame_id": 0,
+ "path_str_id": frame_defs[0]["path_str_id"],
+ "func_str_id": frame_defs[0]["func_str_id"],
+ "line": 0,
+ },
+ )
+
class TestOpcodeHandling(unittest.TestCase):
"""Tests for opcode field handling in collectors."""
@@ -2288,6 +2769,28 @@ def test_gecko_collector_frame_format(self):
# Should have recorded 3 functions
self.assertEqual(thread["funcTable"]["length"], 3)
+ def test_jsonl_collector_frame_format(self):
+ """Test JsonlCollector with 4-element frame format."""
+ collector = JsonlCollector(sample_interval_usec=1000)
+ collector.collect(self._make_sample_frames())
+
+ with tempfile.NamedTemporaryFile(delete=False) as f:
+ self.addClassCleanup(close_and_unlink, f)
+ collector.export(f.name)
+
+ with open(f.name, "r", encoding="utf-8") as fp:
+ records = [json.loads(line) for line in fp]
+
+ _, str_defs, frame_defs, _, _ = jsonl_tables(records)
+
+ self.assertEqual(len(frame_defs), 3)
+
+ paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+ funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+
+ self.assertEqual(paths, {"app.py", "utils.py", "lib.py"})
+ self.assertEqual(funcs, {"main", "helper", "process"})
+
class TestInternalFrameFiltering(unittest.TestCase):
"""Tests for filtering internal profiler frames from output."""
@@ -2415,3 +2918,42 @@ def
test_collapsed_stack_collector_filters_internal_frames(self):
for (call_tree, _), _ in collector.stack_counter.items():
for filename, _, _ in call_tree:
self.assertNotIn("_sync_coordinator", filename)
+
+ def test_jsonl_collector_filters_internal_frames(self):
+ """Test that JsonlCollector filters out internal frames."""
+ jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+ self.addCleanup(close_and_unlink, jsonl_out)
+
+ frames = [
+ MockInterpreterInfo(
+ 0,
+ [
+ MockThreadInfo(
+ 1,
+ [
+ MockFrameInfo("app.py", 50, "run"),
+ MockFrameInfo("/lib/_sync_coordinator.py", 100,
"main"),
+ MockFrameInfo("<frozen runpy>", 87, "_run_code"),
+ ],
+ status=THREAD_STATUS_HAS_GIL,
+ )
+ ],
+ )
+ ]
+
+ collector = JsonlCollector(sample_interval_usec=1000)
+ collector.collect(frames)
+ collector.export(jsonl_out.name)
+
+ with open(jsonl_out.name, "r", encoding="utf-8") as f:
+ records = [json.loads(line) for line in f]
+
+ _, str_defs, frame_defs, _, _ = jsonl_tables(records)
+
+ paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+
+ self.assertIn("app.py", paths)
+ self.assertIn("<frozen runpy>", paths)
+
+ for path in paths:
+ self.assertNotIn("_sync_coordinator", path)
diff --git
a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
new file mode 100644
index 00000000000000..636f45ae8d6c70
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -0,0 +1,4 @@
+The ``profiling.sampling`` module now supports JSONL output format via
+``--jsonl``. Each run emits a newline-delimited JSON file that is
+sequentially parseable by external tools, scripts, and programmatic
+consumers. Patch by Maurycy Pawłowski-Wieroński.
diff --git a/Modules/_remote_debugging/binary_io_reader.c
b/Modules/_remote_debugging/binary_io_reader.c
index 3ec4e0c77964c8..da3e7d55309c27 100644
--- a/Modules/_remote_debugging/binary_io_reader.c
+++ b/Modules/_remote_debugging/binary_io_reader.c
@@ -781,9 +781,9 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader
*reader,
if (frame->lineno != LOCATION_NOT_AVAILABLE) {
location = Py_BuildValue("(iiii)",
frame->lineno,
- frame->end_lineno != LOCATION_NOT_AVAILABLE ?
frame->end_lineno : frame->lineno,
- frame->column != LOCATION_NOT_AVAILABLE ? frame->column : 0,
- frame->end_column != LOCATION_NOT_AVAILABLE ?
frame->end_column : 0);
+ frame->end_lineno,
+ frame->column,
+ frame->end_column);
if (!location) {
Py_DECREF(frame_info);
goto error;
_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]