[Python-checkins] gh-146256: Add `--jsonl` collector to the `profiling.sampling` (#146257)

pablogsal Mon, 04 May 2026 17:55:47 -0700

https://github.com/python/cpython/commit/04ce31852260b3d39e35286c1b6a134a3c475b22
commit: 04ce31852260b3d39e35286c1b6a134a3c475b22
branch: main
author: Maurycy Pawłowski-Wieroński <[email protected]>
committer: pablogsal <[email protected]>
date: 2026-05-05T00:44:37Z
summary:


gh-146256: Add `--jsonl` collector to the `profiling.sampling` (#146257)

files:
A Lib/profiling/sampling/jsonl_collector.py
A Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
M Lib/profiling/sampling/__init__.py
M Lib/profiling/sampling/binary_reader.py
M Lib/profiling/sampling/cli.py
M Lib/profiling/sampling/collector.py
M Lib/profiling/sampling/constants.py
M Lib/test/test_profiling/test_sampling_profiler/helpers.py
M Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
M Lib/test/test_profiling/test_sampling_profiler/test_cli.py
M Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
M Modules/_remote_debugging/binary_io_reader.c

diff --git a/Lib/profiling/sampling/__init__.py 
b/Lib/profiling/sampling/__init__.py
index 6a0bb5e5c2f387..71579a3903253e 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -9,6 +9,15 @@
 from .stack_collector import CollapsedStackCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .jsonl_collector import JsonlCollector
 from .string_table import StringTable
 
-__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", 
"HeatmapCollector", "GeckoCollector", "StringTable")
+__all__ = (
+    "Collector",
+    "PstatsCollector",
+    "CollapsedStackCollector",
+    "HeatmapCollector",
+    "GeckoCollector",
+    "JsonlCollector",
+    "StringTable",
+)
diff --git a/Lib/profiling/sampling/binary_reader.py 
b/Lib/profiling/sampling/binary_reader.py
index a11be3652597a6..a29dad91ae339d 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -4,6 +4,7 @@
 
 from .gecko_collector import GeckoCollector
 from .stack_collector import FlamegraphCollector, CollapsedStackCollector
+from .jsonl_collector import JsonlCollector
 from .pstats_collector import PstatsCollector
 
 
@@ -117,6 +118,8 @@ def convert_binary_to_format(input_file, output_file, 
output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
+        elif output_format == "jsonl":
+            collector = JsonlCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
 
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index 9900415ae8a927..0648713edc52af 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -20,6 +20,7 @@
 from .stack_collector import CollapsedStackCollector, FlamegraphCollector, 
DiffFlamegraphCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .jsonl_collector import JsonlCollector
 from .binary_collector import BinaryCollector
 from .binary_reader import BinaryReader
 from .constants import (
@@ -101,6 +102,7 @@ def __call__(self, parser, namespace, values, 
option_string=None):
     "diff_flamegraph": "html",
     "gecko": "json",
     "heatmap": "html",
+    "jsonl": "jsonl",
     "binary": "bin",
 }
 
@@ -111,6 +113,7 @@ def __call__(self, parser, namespace, values, 
option_string=None):
     "diff_flamegraph": DiffFlamegraphCollector,
     "gecko": GeckoCollector,
     "heatmap": HeatmapCollector,
+    "jsonl": JsonlCollector,
     "binary": BinaryCollector,
 }
 
@@ -488,6 +491,13 @@ def _add_format_options(parser, include_compression=True, 
include_binary=True):
         action=DiffFlamegraphAction,
         help="Generate differential flamegraph comparing current profile to 
`BASELINE` binary file",
     )
+    format_group.add_argument(
+        "--jsonl",
+        action="store_const",
+        const="jsonl",
+        dest="format",
+        help="Generate newline-delimited JSON (JSONL) for programmatic 
consumers",
+    )
     if include_binary:
         format_group.add_argument(
             "--binary",
@@ -611,15 +621,18 @@ def _sort_to_mode(sort_choice):
     return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
 
 def _create_collector(format_type, sample_interval_usec, skip_idle, 
opcodes=False,
-                      output_file=None, compression='auto', 
diff_baseline=None):
+                      mode=None, output_file=None, compression='auto',
+                      diff_baseline=None):
     """Create the appropriate collector based on format type.
 
     Args:
-        format_type: The output format ('pstats', 'collapsed', 'flamegraph', 
'gecko', 'heatmap', 'binary', 'diff_flamegraph')
+        format_type: The output format ('pstats', 'collapsed', 'flamegraph',
+                    'gecko', 'heatmap', 'jsonl', 'binary', 'diff_flamegraph')
         sample_interval_usec: Sampling interval in microseconds
         skip_idle: Whether to skip idle samples
         opcodes: Whether to collect opcode information (only used by gecko 
format
                  for creating interval markers in Firefox Profiler)
+        mode: Profiling mode for collectors that expose it in metadata
         output_file: Output file path (required for binary format)
         compression: Compression type for binary format ('auto', 'zstd', 
'none')
         diff_baseline: Path to baseline binary file for differential flamegraph
@@ -655,6 +668,11 @@ def _create_collector(format_type, sample_interval_usec, 
skip_idle, opcodes=Fals
         skip_idle = False
         return collector_class(sample_interval_usec, skip_idle=skip_idle, 
opcodes=opcodes)
 
+    if format_type == "jsonl":
+        return collector_class(
+            sample_interval_usec, skip_idle=skip_idle, mode=mode
+        )
+
     return collector_class(sample_interval_usec, skip_idle=skip_idle)
 
 
@@ -1142,7 +1160,7 @@ def _handle_attach(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto'),
         diff_baseline=args.diff_baseline
@@ -1249,7 +1267,7 @@ def _handle_run(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto'),
         diff_baseline=args.diff_baseline
diff --git a/Lib/profiling/sampling/collector.py 
b/Lib/profiling/sampling/collector.py
index 08759b611696b7..81ec6344ebdea4 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -20,13 +20,16 @@ def normalize_location(location):
     """Normalize location to a 4-tuple format.
 
     Args:
-        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or 
None
+        location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+            an integer line number, or None
 
     Returns:
         tuple: (lineno, end_lineno, col_offset, end_col_offset)
     """
     if location is None:
         return DEFAULT_LOCATION
+    if isinstance(location, int):
+        return (location, location, -1, -1)
     return location
 
 
@@ -34,13 +37,16 @@ def extract_lineno(location):
     """Extract lineno from location.
 
     Args:
-        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or 
None
+        location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+            an integer line number, or None
 
     Returns:
         int: The line number (0 for synthetic frames)
     """
     if location is None:
         return 0
+    if isinstance(location, int):
+        return location
     return location[0]
 
 def _is_internal_frame(frame):
diff --git a/Lib/profiling/sampling/constants.py 
b/Lib/profiling/sampling/constants.py
index a364d0b8fde1e0..d7c710f943b1b7 100644
--- a/Lib/profiling/sampling/constants.py
+++ b/Lib/profiling/sampling/constants.py
@@ -11,6 +11,14 @@
 PROFILING_MODE_ALL = 3  # Combines GIL + CPU checks
 PROFILING_MODE_EXCEPTION = 4  # Only samples when thread has an active 
exception
 
+PROFILING_MODE_NAMES = {
+    PROFILING_MODE_WALL: "wall",
+    PROFILING_MODE_CPU: "cpu",
+    PROFILING_MODE_GIL: "gil",
+    PROFILING_MODE_ALL: "all",
+    PROFILING_MODE_EXCEPTION: "exception",
+}
+
 # Sort mode constants
 SORT_MODE_NSAMPLES = 0
 SORT_MODE_TOTTIME = 1
diff --git a/Lib/profiling/sampling/jsonl_collector.py 
b/Lib/profiling/sampling/jsonl_collector.py
new file mode 100644
index 00000000000000..7d26129b80de86
--- /dev/null
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -0,0 +1,266 @@
+"""JSON Lines (JSONL) collector for the sampling profiler.
+
+Emits a normalized newline-delimited JSON record stream suitable for
+programmatic consumption by external tools, scripts, and agents. Each line
+is one JSON object; consumers can parse the file incrementally line by
+line, but the producer writes the whole file at the end of the run (it is
+not a live/streaming producer).
+
+Record schema
+=============
+
+Every record is a JSON object with at least ``"type"``, ``"v"`` (record
+schema version), and ``"run_id"`` (UUID4 hex tagging the run; allows
+demultiplexing concatenated streams). Records appear in this fixed order:
+
+1. ``meta`` (exactly one, first line)::
+
+      {"type":"meta","v":0,"run_id":"<hex>",
+       "sample_interval_usec":<int>,"mode":"wall|cpu|gil|all|exception"}
+
+   ``mode`` is omitted when not provided.
+
+2. ``string_table`` (zero or more)::
+
+      {"type":"string_table","v":0,"run_id":"<hex>",
+       "strings":[{"str_id":<int>,"value":"<str>"}, ...]}
+
+   Strings (filenames, function names) are interned to keep repeated values
+   compact. IDs are zero-based. Each chunk holds up to ``_CHUNK_SIZE``
+   entries, and each entry carries its explicit ``str_id`` so consumers do
+   not need to infer offsets across chunks.
+
+3. ``frame_table`` (zero or more)::
+
+      {"type":"frame_table","v":0,"run_id":"<hex>",
+       "frames":[{"frame_id":<int>,"path_str_id":<int>,"func_str_id":<int>,
+                  "line":<int>,"end_line":<int>,"col":<int>,
+                  "end_col":<int>}, ...]}
+
+   ``end_line``/``col``/``end_col`` are *omitted* when source location data
+   is unavailable (a missing key means "not available", not zero or null).
+   ``line`` is ``0`` for synthetic frames (for example, internal marker
+   frames whose source location is None). Frame IDs are zero-based.
+
+4. ``agg`` (zero or more)::
+
+      {"type":"agg","v":0,"run_id":"<hex>","kind":"frame","scope":"final",
+       "samples_total":<int>,
+       "entries":[{"frame_id":<int>,"self":<int>,"cumulative":<int>}, ...]}
+
+   ``self`` counts samples where the frame was the leaf (currently
+   executing); ``cumulative`` counts samples where the frame appeared
+   anywhere in the stack (deduped per sample so recursion does not
+   double-count). ``samples_total`` is the run-wide total, repeated on
+   each chunk so a streaming consumer always knows the denominator.
+
+5. ``end`` (exactly one, last line)::
+
+      {"type":"end","v":0,"run_id":"<hex>","samples_total":<int>}
+
+   Presence of ``end`` is the consumer's signal that the file is complete.
+
+Forward compatibility
+=====================
+
+Consumers MUST ignore unknown record ``"type"`` values and unknown object
+fields. New fields will be added by adding optional keys; an incompatible
+schema change will bump the per-record ``"v"``.
+"""
+
+from collections import Counter
+import json
+import uuid
+from itertools import batched
+
+from .constants import PROFILING_MODE_NAMES
+from .collector import normalize_location
+from .stack_collector import StackTraceCollector
+
+
+_CHUNK_SIZE = 256
+_SCHEMA_VERSION = 0
+
+
+class JsonlCollector(StackTraceCollector):
+    """Collector that exports finalized profiling data as JSONL.
+
+    See the module docstring for the full record schema. The collector
+    accumulates samples in memory and writes the complete file at
+    ``export()`` time.
+    """
+
+    def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
+        super().__init__(sample_interval_usec, skip_idle=skip_idle)
+        self.run_id = uuid.uuid4().hex
+
+        self._string_to_id = {}
+        self._strings = []
+
+        self._frame_to_id = {}
+        self._frames = []
+
+        self._frame_self = Counter()
+        self._frame_cumulative = Counter()
+        self._samples_total = 0
+        self._seen_frame_ids = set()
+
+        self._mode = mode
+
+    def process_frames(self, frames, _thread_id, weight=1):
+        self._samples_total += weight
+        self._seen_frame_ids.clear()
+
+        for i, (filename, location, funcname, _opcode) in enumerate(frames):
+            frame_id = self._get_or_create_frame_id(
+                filename, location, funcname
+            )
+            is_leaf = i == 0
+            count_cumulative = frame_id not in self._seen_frame_ids
+
+            if count_cumulative:
+                self._seen_frame_ids.add(frame_id)
+
+            if is_leaf:
+                self._frame_self[frame_id] += weight
+
+            if count_cumulative:
+                self._frame_cumulative[frame_id] += weight
+
+    def export(self, filename):
+        with open(filename, "w", encoding="utf-8") as output:
+            self._write_message(output, self._build_meta_record())
+            self._write_chunked_records(
+                output,
+                {
+                    "type": "string_table",
+                    "v": _SCHEMA_VERSION,
+                    "run_id": self.run_id,
+                },
+                "strings",
+                self._strings,
+            )
+            self._write_chunked_records(
+                output,
+                {
+                    "type": "frame_table",
+                    "v": _SCHEMA_VERSION,
+                    "run_id": self.run_id,
+                },
+                "frames",
+                self._frames,
+            )
+            self._write_chunked_records(
+                output,
+                {
+                    "type": "agg",
+                    "v": _SCHEMA_VERSION,
+                    "run_id": self.run_id,
+                    "kind": "frame",
+                    "scope": "final",
+                    "samples_total": self._samples_total,
+                },
+                "entries",
+                self._iter_final_agg_entries(),
+            )
+            self._write_message(output, self._build_end_record())
+
+    def _build_meta_record(self):
+        record = {
+            "type": "meta",
+            "v": _SCHEMA_VERSION,
+            "run_id": self.run_id,
+            "sample_interval_usec": self.sample_interval_usec,
+        }
+
+        if self._mode is not None:
+            record["mode"] = PROFILING_MODE_NAMES.get(
+                self._mode, str(self._mode)
+            )
+
+        return record
+
+    def _build_end_record(self):
+        record = {
+            "type": "end",
+            "v": _SCHEMA_VERSION,
+            "run_id": self.run_id,
+            "samples_total": self._samples_total,
+        }
+
+        return record
+
+    def _iter_final_agg_entries(self):
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            yield {
+                "frame_id": frame_id,
+                "self": self._frame_self[frame_id],
+                "cumulative": self._frame_cumulative[frame_id],
+            }
+
+    def _get_or_create_frame_id(self, filename, location, funcname):
+        location_fields = self._location_to_export_fields(location)
+        func_str_id = self._intern_string(funcname)
+        path_str_id = self._intern_string(filename)
+
+        frame_key = (
+            path_str_id,
+            func_str_id,
+            location_fields["line"],
+            location_fields.get("end_line"),
+            location_fields.get("col"),
+            location_fields.get("end_col"),
+        )
+
+        if (frame_id := self._frame_to_id.get(frame_key)) is not None:
+            return frame_id
+
+        frame_id = len(self._frames)
+        frame_record = {
+            "frame_id": frame_id,
+            "path_str_id": path_str_id,
+            "func_str_id": func_str_id,
+            **location_fields,
+        }
+
+        self._frame_to_id[frame_key] = frame_id
+        self._frames.append(frame_record)
+        return frame_id
+
+    def _intern_string(self, value):
+        value = str(value)
+
+        if (string_id := self._string_to_id.get(value)) is not None:
+            return string_id
+
+        string_id = len(self._strings)
+        self._string_to_id[value] = string_id
+        self._strings.append({"str_id": string_id, "value": value})
+        return string_id
+
+    @staticmethod
+    def _location_to_export_fields(location):
+        lineno, end_lineno, col_offset, end_col_offset = normalize_location(
+            location
+        )
+
+        fields = {"line": lineno}
+        if end_lineno > 0:
+            fields["end_line"] = end_lineno
+        if col_offset >= 0:
+            fields["col"] = col_offset
+        if end_col_offset >= 0:
+            fields["end_col"] = end_col_offset
+        return fields
+
+    def _write_chunked_records(
+        self, output, base_record, chunk_field, entries
+    ):
+        for chunk in batched(entries, _CHUNK_SIZE):
+            self._write_message(output, {**base_record, chunk_field: chunk})
+
+    @staticmethod
+    def _write_message(output, record):
+        output.write(json.dumps(record, separators=(",", ":")))
+        output.write("\n")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/helpers.py 
b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
index 0e32d8dd9eabef..b07776d415bb29 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/helpers.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
@@ -174,3 +174,29 @@ def close_and_unlink(file):
     """Close a file and unlink it from the filesystem."""
     file.close()
     unlink(file.name)
+
+
+def jsonl_tables(records):
+    """Extract the canonical sections of a parsed JSONL profile.
+
+    Returns ``(meta, str_defs, frame_defs, agg, end)`` where ``str_defs`` is a
+    ``{str_id: value}`` dict, ``frame_defs`` is a flat list of all frame
+    definitions across chunks, and ``agg`` is the first agg record (sufficient
+    for tests that only emit one chunk).
+    """
+    meta = next(record for record in records if record["type"] == "meta")
+    end = next(record for record in records if record["type"] == "end")
+    agg = next(record for record in records if record["type"] == "agg")
+    str_defs = {
+        item["str_id"]: item["value"]
+        for record in records
+        if record["type"] == "string_table"
+        for item in record["strings"]
+    }
+    frame_defs = [
+        item
+        for record in records
+        if record["type"] == "frame_table"
+        for item in record["frames"]
+    ]
+    return meta, str_defs, frame_defs, agg, end
diff --git 
a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py 
b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
index 7e6cb724c407e3..ca6cb6befaed24 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py
@@ -1,5 +1,6 @@
 """Tests for binary format round-trip functionality."""
 
+import json
 import os
 import random
 import tempfile
@@ -21,7 +22,7 @@
         THREAD_STATUS_MAIN_THREAD,
     )
     from profiling.sampling.binary_collector import BinaryCollector
-    from profiling.sampling.binary_reader import BinaryReader
+    from profiling.sampling.binary_reader import BinaryReader, 
convert_binary_to_format
     from profiling.sampling.gecko_collector import GeckoCollector
 
     ZSTD_AVAILABLE = _remote_debugging.zstd_available()
@@ -30,6 +31,8 @@
         "Test only runs when _remote_debugging is available"
     )
 
+from .helpers import jsonl_tables
+
 
 def make_frame(filename, lineno, funcname, end_lineno=None, column=None,
                end_column=None, opcode=None):
@@ -1343,5 +1346,70 @@ def test_timestamp_preservation_with_rle(self):
         self.assertEqual(ts_collector.all_timestamps, expected_timestamps)
 
 
+class TestBinaryReplayToJsonl(BinaryFormatTestBase):
+    """Tests for binary -> JSONL replay via convert_binary_to_format."""
+
+    def _replay_to_jsonl(self, samples, interval=1000):
+        bin_path = self.create_binary_file(samples, interval=interval)
+        with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
+            jsonl_path = f.name
+        self.temp_files.append(jsonl_path)
+
+        convert_binary_to_format(bin_path, jsonl_path, "jsonl")
+
+        with open(jsonl_path, "r", encoding="utf-8") as f:
+            return [json.loads(line) for line in f]
+
+    def test_binary_replay_to_jsonl_basic(self):
+        """Replay a small .bin to JSONL: meta/end shape, samples_total, 
run_id."""
+        frame = make_frame("hot.py", 99, "hot_func")
+        samples = [
+            [make_interpreter(0, [make_thread(1, [frame])])]
+            for _ in range(5)
+        ]
+        records = self._replay_to_jsonl(samples, interval=2000)
+        meta, _, frame_defs, _, end = jsonl_tables(records)
+
+        self.assertEqual(meta["sample_interval_usec"], 2000)
+        self.assertEqual(end["samples_total"], 5)
+
+        run_ids = {r["run_id"] for r in records}
+        self.assertEqual(len(run_ids), 1)
+        self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
+
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(frame_defs[0]["line"], 99)
+
+    def test_binary_replay_to_jsonl_rle_weight_propagation(self):
+        """RLE-batched identical samples land as a single agg entry with the 
right total."""
+        frame = make_frame("rle.py", 42, "rle_func")
+        samples = [
+            [make_interpreter(0, [make_thread(1, [frame])])]
+            for _ in range(50)
+        ]
+        records = self._replay_to_jsonl(samples)
+        _, _, _, agg, end = jsonl_tables(records)
+
+        self.assertEqual(end["samples_total"], 50)
+        self.assertEqual(agg["entries"], [
+            {"frame_id": 0, "self": 50, "cumulative": 50},
+        ])
+
+    def test_binary_replay_to_jsonl_omits_unavailable_columns(self):
+        """Columns the binary recorder did not capture are omitted, not 0."""
+        # make_frame defaults column/end_column to 0; pass column=-1 / 
end_column=-1
+        # so the binary side records LOCATION_NOT_AVAILABLE.
+        frame = make_frame("nocol.py", 7, "no_col", column=-1, end_column=-1)
+        samples = [[make_interpreter(0, [make_thread(1, [frame])])]]
+        records = self._replay_to_jsonl(samples)
+        _, _, frame_defs, _, _ = jsonl_tables(records)
+
+        self.assertEqual(len(frame_defs), 1)
+        fd = frame_defs[0]
+        self.assertEqual(fd["line"], 7)
+        self.assertNotIn("col", fd)
+        self.assertNotIn("end_col", fd)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py 
b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
index c522c50d1fd5fa..9c0734ac804e1b 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -1,6 +1,7 @@
 """Tests for sampling profiler CLI argument parsing and functionality."""
 
 import io
+import json
 import os
 import subprocess
 import sys
@@ -21,9 +22,19 @@
     requires_remote_subprocess_debugging,
 )
 
-from profiling.sampling.cli import main
-from profiling.sampling.constants import PROFILING_MODE_ALL, 
PROFILING_MODE_WALL
+from profiling.sampling.cli import (
+    FORMAT_EXTENSIONS,
+    _create_collector,
+    _generate_output_filename,
+    main,
+)
+from profiling.sampling.constants import (
+    PROFILING_MODE_ALL,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_WALL,
+)
 from profiling.sampling.errors import SamplingScriptNotFoundError, 
SamplingModuleNotFoundError, SamplingUnknownProcessError
+from profiling.sampling.jsonl_collector import JsonlCollector
 
 class TestSampleProfilerCLI(unittest.TestCase):
     def _setup_sync_mocks(self, mock_socket, mock_popen):
@@ -912,3 +923,65 @@ def test_cli_replay_reader_errors_exit_cleanly(self):
             str(cm.exception),
             "Error: Unsupported format version 2",
         )
+
+    def test_cli_jsonl_format_mutually_exclusive_with_pstats(self):
+        """--jsonl and --pstats cannot be combined (mutually exclusive 
group)."""
+        with (
+            mock.patch(
+                "sys.argv",
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--jsonl",
+                    "--pstats",
+                ],
+            ),
+            mock.patch("sys.stderr", io.StringIO()),
+        ):
+            with self.assertRaises(SystemExit):
+                main()
+
+    def test_cli_jsonl_extension_in_format_extensions(self):
+        """FORMAT_EXTENSIONS maps 'jsonl' -> 'jsonl' so default filenames 
work."""
+        self.assertEqual(FORMAT_EXTENSIONS["jsonl"], "jsonl")
+        self.assertEqual(_generate_output_filename("jsonl", 12345), 
"jsonl_12345.jsonl")
+
+    def test_cli_jsonl_create_collector_propagates_mode(self):
+        """_create_collector('jsonl', ..., mode=X) lands X in the meta 
record."""
+        collector = _create_collector(
+            "jsonl",
+            sample_interval_usec=1000,
+            skip_idle=False,
+            mode=PROFILING_MODE_CPU,
+        )
+        self.assertIsInstance(collector, JsonlCollector)
+
+        with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
+            jsonl_path = f.name
+        self.addCleanup(os.unlink, jsonl_path)
+        collector.export(jsonl_path)
+        with open(jsonl_path, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+        meta = next(r for r in records if r["type"] == "meta")
+        self.assertEqual(meta["mode"], "cpu")
+
+    def test_cli_jsonl_rejects_opcodes_combination(self):
+        """--opcodes is incompatible with --jsonl per 
opcodes_compatible_formats."""
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",
+            "12345",
+            "--jsonl",
+            "--opcodes",
+        ]
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            mock.patch("profiling.sampling.cli.sample"),
+            self.assertRaises(SystemExit) as cm,
+        ):
+            main()
+
+        self.assertEqual(cm.exception.code, 2)
+        self.assertIn("--opcodes", mock_stderr.getvalue())
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py 
b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 240ec8a195c43b..b42e7aa579f40c 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -16,6 +16,7 @@
         CollapsedStackCollector,
         FlamegraphCollector,
     )
+    from profiling.sampling.jsonl_collector import JsonlCollector
     from profiling.sampling.gecko_collector import GeckoCollector
     from profiling.sampling.collector import extract_lineno, normalize_location
     from profiling.sampling.opcode_utils import get_opcode_info, format_opcode
@@ -38,7 +39,7 @@
 from test.support import captured_stdout, captured_stderr
 
 from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo, 
LocationInfo, make_diff_collector_with_mock_baseline
-from .helpers import close_and_unlink
+from .helpers import close_and_unlink, jsonl_tables
 
 
 def resolve_name(node, strings):
@@ -1669,6 +1670,393 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff"], -1.0)
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
+    def test_jsonl_collector_export_exact_output(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        collector.run_id = "run-123"
+
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
+                    )
+                ],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
+                    )
+                ],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("other.py", 5, "other_func")]
+                    )
+                ],
+            )
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        self.assertEqual(
+            content,
+            (
+                
'{"type":"meta","v":0,"run_id":"run-123","sample_interval_usec":1000}\n'
+                
'{"type":"string_table","v":0,"run_id":"run-123","strings":[{"str_id":0,"value":"func1"},{"str_id":1,"value":"file.py"},{"str_id":2,"value":"func2"},{"str_id":3,"value":"other_func"},{"str_id":4,"value":"other.py"}]}\n'
+                
'{"type":"frame_table","v":0,"run_id":"run-123","frames":[{"frame_id":0,"path_str_id":1,"func_str_id":0,"line":10,"end_line":10},{"frame_id":1,"path_str_id":1,"func_str_id":2,"line":20,"end_line":20},{"frame_id":2,"path_str_id":4,"func_str_id":3,"line":5,"end_line":5}]}\n'
+                
'{"type":"agg","v":0,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":0,"self":2,"cumulative":2},{"frame_id":1,"self":0,"cumulative":2},{"frame_id":2,"self":1,"cumulative":1}]}\n'
+                '{"type":"end","v":0,"run_id":"run-123","samples_total":3}\n'
+            ),
+        )
+
+    def test_jsonl_collector_export_includes_mode_in_meta(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000, mode=PROFILING_MODE_CPU)
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1, [MockFrameInfo("file.py", 10, "func")]
+                        )
+                    ],
+                )
+            ]
+        )
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        meta_record = next(
+            record for record in records if record["type"] == "meta"
+        )
+        self.assertEqual(meta_record["mode"], "cpu")
+
+    def test_jsonl_collector_export_empty_profile(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        collector.run_id = "run-123"
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        self.assertEqual(
+            [record["type"] for record in records], ["meta", "end"]
+        )
+        self.assertEqual(records[0]["sample_interval_usec"], 1000)
+        self.assertEqual(records[0]["run_id"], "run-123")
+        self.assertEqual(records[1]["samples_total"], 0)
+        self.assertEqual(records[1]["run_id"], "run-123")
+
+    def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
+                            ],
+                        )
+                    ],
+                )
+            ]
+        )
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, _, frame_defs, agg_record, end_record = jsonl_tables(records)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(
+            agg_record["entries"],
+            [
+                {
+                    "frame_id": frame_defs[0]["frame_id"],
+                    "self": 1,
+                    "cumulative": 1,
+                }
+            ],
+        )
+        self.assertEqual(agg_record["samples_total"], 1)
+        self.assertEqual(end_record["samples_total"], 1)
+
+    def test_jsonl_collector_skip_idle_filters_threads(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        active_status = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [MockFrameInfo("active1.py", 10, "active_func1")],
+                        status=active_status,
+                    ),
+                    MockThreadInfo(
+                        2,
+                        [MockFrameInfo("idle.py", 20, "idle_func")],
+                        status=0,
+                    ),
+                    MockThreadInfo(
+                        3,
+                        [MockFrameInfo("active2.py", 30, "active_func2")],
+                        status=active_status,
+                    ),
+                ],
+            )
+        ]
+
+        def export_summary(skip_idle):
+            collector = JsonlCollector(1000, skip_idle=skip_idle)
+            collector.collect(frames)
+            collector.export(jsonl_out.name)
+
+            with open(jsonl_out.name, "r", encoding="utf-8") as f:
+                records = [json.loads(line) for line in f]
+
+            _, str_defs, frame_defs, agg_record, _ = jsonl_tables(records)
+            paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+            funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+            return paths, funcs, agg_record["samples_total"]
+
+        paths, funcs, samples_total = export_summary(skip_idle=True)
+        self.assertEqual(paths, {"active1.py", "active2.py"})
+        self.assertEqual(funcs, {"active_func1", "active_func2"})
+        self.assertEqual(samples_total, 2)
+
+        paths, funcs, samples_total = export_summary(skip_idle=False)
+        self.assertEqual(paths, {"active1.py", "idle.py", "active2.py"})
+        self.assertEqual(funcs, {"active_func1", "idle_func", "active_func2"})
+        self.assertEqual(samples_total, 3)
+
+    def test_jsonl_collector_splits_large_exports_into_chunks(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+
+        for i in range(257):
+            collector.collect(
+                [
+                    MockInterpreterInfo(
+                        0,
+                        [
+                            MockThreadInfo(
+                                1,
+                                [
+                                    MockFrameInfo(
+                                        f"file{i}.py", i + 1, f"func{i}"
+                                    )
+                                ],
+                            )
+                        ],
+                    )
+                ]
+            )
+
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        run_ids = {record["run_id"] for record in records}
+        self.assertEqual(len(run_ids), 1)
+        self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
+
+        _, str_defs, frame_defs, agg_record, end_record = jsonl_tables(
+            records
+        )
+        str_chunks = [
+            record for record in records if record["type"] == "string_table"
+        ]
+        frame_chunks = [
+            record for record in records if record["type"] == "frame_table"
+        ]
+        agg_chunks = [record for record in records if record["type"] == "agg"]
+
+        self.assertEqual(
+            [len(record["strings"]) for record in str_chunks],
+            [256, 256, 2],
+        )
+        self.assertEqual(
+            [len(record["frames"]) for record in frame_chunks], [256, 1]
+        )
+        self.assertEqual(
+            [len(record["entries"]) for record in agg_chunks], [256, 1]
+        )
+        self.assertEqual(len(str_defs), 514)
+        self.assertEqual(len(frame_defs), 257)
+        self.assertEqual(agg_record["samples_total"], 257)
+        self.assertEqual(end_record["samples_total"], 257)
+
+    def test_jsonl_collector_respects_weight_for_rle_batched_samples(self):
+        """weight>1 (from binary replay RLE) is honored in self/cumulative."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        leaf = MockFrameInfo("file.py", 10, "leaf")
+        non_leaf = MockFrameInfo("file.py", 20, "non_leaf")
+
+        collector.process_frames([leaf, non_leaf], _thread_id=1, weight=5)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, str_defs, frame_defs, agg, end = jsonl_tables(records)
+        self.assertEqual(end["samples_total"], 5)
+        self.assertEqual(agg["samples_total"], 5)
+        self.assertEqual(
+            {str_defs[fd["func_str_id"]]: fd["frame_id"] for fd in frame_defs},
+            {"leaf": 0, "non_leaf": 1},
+        )
+        self.assertEqual(agg["entries"], [
+            {"frame_id": 0, "self": 5, "cumulative": 5},
+            {"frame_id": 1, "self": 0, "cumulative": 5},
+        ])
+
+    def test_jsonl_collector_recursion_with_weight(self):
+        """Recursion dedup respects weight, not occurrence count."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        recursive = MockFrameInfo("rec.py", 10, "f")
+
+        collector.process_frames([recursive] * 3, _thread_id=1, weight=3)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, _, frame_defs, agg, _ = jsonl_tables(records)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(agg["entries"], [
+            {"frame_id": 0, "self": 3, "cumulative": 3},
+        ])
+
+    def test_jsonl_collector_emits_col_and_end_col_when_present(self):
+        """All four location fields are emitted when col/end_col are >= 0."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        frame = MockFrameInfo("test.py", 0, "f")
+        frame.location = LocationInfo(42, 45, 4, 12)
+        frames = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, str_defs, frame_defs, _, _ = jsonl_tables(records)
+        self.assertEqual(frame_defs, [
+            {
+                "frame_id": 0,
+                "path_str_id": 1,
+                "func_str_id": 0,
+                "line": 42,
+                "end_line": 45,
+                "col": 4,
+                "end_col": 12,
+            },
+        ])
+        self.assertEqual(str_defs, {0: "f", 1: "test.py"})
+
+    def test_jsonl_collector_partial_location_elision(self):
+        """Negative col/end_col/end_line fields are individually elided."""
+        # _get_or_create_frame_id interns funcname before filename, so
+        # func_str_id=0 ("f") and path_str_id=1 ("test.py").
+        common = {"frame_id": 0, "path_str_id": 1, "func_str_id": 0}
+        cases = [
+            (LocationInfo(42, 45, -1, 12),
+             {**common, "line": 42, "end_line": 45, "end_col": 12}),
+            (LocationInfo(42, 45, 4, -1),
+             {**common, "line": 42, "end_line": 45, "col": 4}),
+            (LocationInfo(42, 0, 4, 8),
+             {**common, "line": 42, "col": 4, "end_col": 8}),
+        ]
+        for loc, expected_frame_def in cases:
+            with self.subTest(location=loc):
+                jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+                self.addCleanup(close_and_unlink, jsonl_out)
+
+                collector = JsonlCollector(1000)
+                frame = MockFrameInfo("test.py", 0, "f")
+                frame.location = loc
+                frames = [
+                    MockInterpreterInfo(
+                        0,
+                        [MockThreadInfo(1, [frame], 
status=THREAD_STATUS_HAS_GIL)],
+                    )
+                ]
+                collector.collect(frames)
+                collector.export(jsonl_out.name)
+
+                with open(jsonl_out.name, "r", encoding="utf-8") as f:
+                    records = [json.loads(line) for line in f]
+
+                _, _, frame_defs, _, _ = jsonl_tables(records)
+                self.assertEqual(frame_defs, [expected_frame_def])
+
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
     """Tests for correct handling of recursive functions in cumulative 
stats."""
@@ -1878,6 +2266,20 @@ def test_extract_lineno_from_none(self):
         """Test extracting lineno from None (synthetic frames)."""
         self.assertEqual(extract_lineno(None), 0)
 
+    def test_extract_lineno_from_int(self):
+        """Test extracting lineno from a bare integer line number.
+
+        Mirrors normalize_location's int contract so callers like the
+        collapsed/flamegraph collectors do not crash on a bare-int location.
+        """
+        self.assertEqual(extract_lineno(42), 42)
+        self.assertEqual(extract_lineno(0), 0)
+
+    def test_normalize_location_with_int(self):
+        """Test normalize_location expands a legacy integer line number."""
+        result = normalize_location(42)
+        self.assertEqual(result, (42, 42, -1, -1))
+
     def test_normalize_location_with_location_info(self):
         """Test normalize_location passes through LocationInfo."""
         loc = LocationInfo(10, 15, 0, 5)
@@ -2068,6 +2470,85 @@ def test_gecko_collector_with_location_info(self):
         # Verify function name is in string table
         self.assertIn("handle_request", string_array)
 
+    def test_jsonl_collector_with_location_info(self):
+        """Test JsonlCollector handles LocationInfo properly."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+
+        # Frame with LocationInfo
+        frame = MockFrameInfo("test.py", 42, "my_function")
+        frames = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        meta, str_defs, frame_defs, agg, end = jsonl_tables(records)
+        self.assertEqual(meta["sample_interval_usec"], 1000)
+        self.assertEqual(agg["samples_total"], 1)
+        self.assertEqual(end["samples_total"], 1)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "test.py")
+        self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "my_function")
+        self.assertEqual(
+            frame_defs[0],
+            {
+                "frame_id": 0,
+                "path_str_id": frame_defs[0]["path_str_id"],
+                "func_str_id": frame_defs[0]["func_str_id"],
+                "line": 42,
+                "end_line": 42,
+            },
+        )
+
+    def test_jsonl_collector_with_none_location(self):
+        """Test JsonlCollector handles None location (synthetic frames)."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+
+        # Create frame with None location (like GC frame)
+        frame = MockFrameInfo("~", 0, "<GC>")
+        frame.location = None  # Synthetic frame has no location
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        meta, str_defs, frame_defs, agg, end = jsonl_tables(records)
+        self.assertEqual(meta["sample_interval_usec"], 1000)
+        self.assertEqual(agg["samples_total"], 1)
+        self.assertEqual(end["samples_total"], 1)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "~")
+        self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "<GC>")
+        self.assertEqual(
+            frame_defs[0],
+            {
+                "frame_id": 0,
+                "path_str_id": frame_defs[0]["path_str_id"],
+                "func_str_id": frame_defs[0]["func_str_id"],
+                "line": 0,
+            },
+        )
+
 
 class TestOpcodeHandling(unittest.TestCase):
     """Tests for opcode field handling in collectors."""
@@ -2288,6 +2769,28 @@ def test_gecko_collector_frame_format(self):
         # Should have recorded 3 functions
         self.assertEqual(thread["funcTable"]["length"], 3)
 
+    def test_jsonl_collector_frame_format(self):
+        """Test JsonlCollector with 4-element frame format."""
+        collector = JsonlCollector(sample_interval_usec=1000)
+        collector.collect(self._make_sample_frames())
+
+        with tempfile.NamedTemporaryFile(delete=False) as f:
+            self.addClassCleanup(close_and_unlink, f)
+            collector.export(f.name)
+
+        with open(f.name, "r", encoding="utf-8") as fp:
+            records = [json.loads(line) for line in fp]
+
+        _, str_defs, frame_defs, _, _ = jsonl_tables(records)
+
+        self.assertEqual(len(frame_defs), 3)
+
+        paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+        funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+
+        self.assertEqual(paths, {"app.py", "utils.py", "lib.py"})
+        self.assertEqual(funcs, {"main", "helper", "process"})
+
 
 class TestInternalFrameFiltering(unittest.TestCase):
     """Tests for filtering internal profiler frames from output."""
@@ -2415,3 +2918,42 @@ def 
test_collapsed_stack_collector_filters_internal_frames(self):
         for (call_tree, _), _ in collector.stack_counter.items():
             for filename, _, _ in call_tree:
                 self.assertNotIn("_sync_coordinator", filename)
+
+    def test_jsonl_collector_filters_internal_frames(self):
+        """Test that JsonlCollector filters out internal frames."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("app.py", 50, "run"),
+                            MockFrameInfo("/lib/_sync_coordinator.py", 100, 
"main"),
+                            MockFrameInfo("<frozen runpy>", 87, "_run_code"),
+                        ],
+                        status=THREAD_STATUS_HAS_GIL,
+                    )
+                ],
+            )
+        ]
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+        collector.collect(frames)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, str_defs, frame_defs, _, _ = jsonl_tables(records)
+
+        paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+
+        self.assertIn("app.py", paths)
+        self.assertIn("<frozen runpy>", paths)
+
+        for path in paths:
+            self.assertNotIn("_sync_coordinator", path)
diff --git 
a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst 
b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
new file mode 100644
index 00000000000000..636f45ae8d6c70
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -0,0 +1,4 @@
+The ``profiling.sampling`` module now supports JSONL output format via
+``--jsonl``. Each run emits a newline-delimited JSON file that is
+sequentially parseable by external tools, scripts, and programmatic
+consumers. Patch by Maurycy Pawłowski-Wieroński.
diff --git a/Modules/_remote_debugging/binary_io_reader.c 
b/Modules/_remote_debugging/binary_io_reader.c
index 3ec4e0c77964c8..da3e7d55309c27 100644
--- a/Modules/_remote_debugging/binary_io_reader.c
+++ b/Modules/_remote_debugging/binary_io_reader.c
@@ -781,9 +781,9 @@ build_frame_list(RemoteDebuggingState *state, BinaryReader 
*reader,
         if (frame->lineno != LOCATION_NOT_AVAILABLE) {
             location = Py_BuildValue("(iiii)",
                 frame->lineno,
-                frame->end_lineno != LOCATION_NOT_AVAILABLE ? 
frame->end_lineno : frame->lineno,
-                frame->column != LOCATION_NOT_AVAILABLE ? frame->column : 0,
-                frame->end_column != LOCATION_NOT_AVAILABLE ? 
frame->end_column : 0);
+                frame->end_lineno,
+                frame->column,
+                frame->end_column);
             if (!location) {
                 Py_DECREF(frame_info);
                 goto error;

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

[Python-checkins] gh-146256: Add `--jsonl` collector to the `profiling.sampling` (#146257)

Reply via email to