From 2749a44d99576c1dffe9eeb2467ce9a8cfaaf953 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Sat, 21 Mar 2026 16:57:18 +0100
Subject: [PATCH 01/38] first stab

---
 Lib/profiling/sampling/__init__.py         |  11 +-
 Lib/profiling/sampling/binary_reader.py    |   3 +
 Lib/profiling/sampling/cli.py              |  25 ++-
 Lib/profiling/sampling/ndjson_collector.py | 216 +++++++++++++++++++++
 4 files changed, 250 insertions(+), 5 deletions(-)
 create mode 100644 Lib/profiling/sampling/ndjson_collector.py

diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py
index 6a0bb5e5c2f387..21d3a773a2ba63 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -9,6 +9,15 @@
 from .stack_collector import CollapsedStackCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .ndjson_collector import NdjsonCollector
 from .string_table import StringTable
 
-__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "HeatmapCollector", "GeckoCollector", "StringTable")
+__all__ = (
+    "Collector",
+    "PstatsCollector",
+    "CollapsedStackCollector",
+    "HeatmapCollector",
+    "GeckoCollector",
+    "NdjsonCollector",
+    "StringTable",
+)
diff --git a/Lib/profiling/sampling/binary_reader.py b/Lib/profiling/sampling/binary_reader.py
index a11be3652597a6..d5bfc0d6130f1a 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -4,6 +4,7 @@
 
 from .gecko_collector import GeckoCollector
 from .stack_collector import FlamegraphCollector, CollapsedStackCollector
+from .ndjson_collector import NdjsonCollector
 from .pstats_collector import PstatsCollector
 
 
@@ -117,6 +118,8 @@ def convert_binary_to_format(input_file, output_file, output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
+        elif output_format == 'ndjson':
+            collector = NdjsonCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
 
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index f4b31aad45b922..4f9e784f80495d 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -19,6 +19,7 @@
 from .stack_collector import CollapsedStackCollector, FlamegraphCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .ndjson_collector import NdjsonCollector
 from .binary_collector import BinaryCollector
 from .binary_reader import BinaryReader
 from .constants import (
@@ -87,6 +88,7 @@ class CustomFormatter(
     "flamegraph": "html",
     "gecko": "json",
     "heatmap": "html",
+    "ndjson": "ndjson",
     "binary": "bin",
 }
 
@@ -96,6 +98,7 @@ class CustomFormatter(
     "flamegraph": FlamegraphCollector,
     "gecko": GeckoCollector,
     "heatmap": HeatmapCollector,
+    "ndjson": NdjsonCollector,
     "binary": BinaryCollector,
 }
 
@@ -467,6 +470,13 @@ def _add_format_options(parser, include_compression=True, include_binary=True):
         dest="format",
         help="Generate interactive HTML heatmap visualization with line-level sample counts",
     )
+    format_group.add_argument(
+        "--ndjson",
+        action="store_const",
+        const="ndjson",
+        dest="format",
+        help="Generate NDJSON snapshot output for external consumers",
+    )
     if include_binary:
         format_group.add_argument(
             "--binary",
@@ -545,15 +555,17 @@ def _sort_to_mode(sort_choice):
     return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
 
 def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False,
-                      output_file=None, compression='auto'):
+                      mode=None, output_file=None, compression='auto'):
     """Create the appropriate collector based on format type.
 
     Args:
-        format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary')
+        format_type: The output format ('pstats', 'collapsed', 'flamegraph',
+                    'gecko', 'heatmap', 'ndjson', 'binary')
         sample_interval_usec: Sampling interval in microseconds
         skip_idle: Whether to skip idle samples
         opcodes: Whether to collect opcode information (only used by gecko format
                  for creating interval markers in Firefox Profiler)
+        mode: Profiling mode for collectors that expose it in metadata
         output_file: Output file path (required for binary format)
         compression: Compression type for binary format ('auto', 'zstd', 'none')
 
@@ -577,6 +589,11 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
         skip_idle = False
         return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes)
 
+    if format_type == "ndjson":
+        return collector_class(
+            sample_interval_usec, skip_idle=skip_idle, mode=mode
+        )
+
     return collector_class(sample_interval_usec, skip_idle=skip_idle)
 
 
@@ -951,7 +968,7 @@ def _handle_attach(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto')
     )
@@ -1029,7 +1046,7 @@ def _handle_run(args):
 
     # Create the appropriate collector
     collector = _create_collector(
-        args.format, args.sample_interval_usec, skip_idle, args.opcodes,
+        args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
         output_file=output_file,
         compression=getattr(args, 'compression', 'auto')
     )
diff --git a/Lib/profiling/sampling/ndjson_collector.py b/Lib/profiling/sampling/ndjson_collector.py
new file mode 100644
index 00000000000000..123ec1c5ea9a1c
--- /dev/null
+++ b/Lib/profiling/sampling/ndjson_collector.py
@@ -0,0 +1,216 @@
+"""NDJSON collector."""
+
+import json
+import uuid
+from itertools import batched
+
+from .constants import (
+    PROFILING_MODE_ALL,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_EXCEPTION,
+    PROFILING_MODE_GIL,
+    PROFILING_MODE_WALL,
+)
+from .stack_collector import StackTraceCollector
+
+
+_CHUNK_SIZE = 1000
+
+_MODE_NAMES = {
+    PROFILING_MODE_WALL: "wall",
+    PROFILING_MODE_CPU: "cpu",
+    PROFILING_MODE_GIL: "gil",
+    PROFILING_MODE_ALL: "all",
+    PROFILING_MODE_EXCEPTION: "exception",
+}
+
+
+class NdjsonCollector(StackTraceCollector):
+    """Collector that exports finalized profiling data as NDJSON."""
+
+    def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
+        super().__init__(sample_interval_usec, skip_idle=skip_idle)
+        self.run_id = uuid.uuid4().hex
+
+        self._string_to_id = {}
+        self._strings = []
+
+        self._frame_to_id = {}
+        self._frames = []
+
+        self._frame_self = {}
+        self._frame_cumulative = {}
+        self._samples_total = 0
+
+        self._mode = mode
+
+    def process_frames(self, frames, _thread_id, weight=1):
+        if not frames:
+            return
+
+        self._samples_total += weight
+
+        frame_ids = [
+            self._get_or_create_frame_id(filename, location, funcname)
+            for filename, location, funcname, _opcode in frames
+        ]
+        leaf_frame_id = frame_ids[0]
+
+        self._frame_self[leaf_frame_id] = (
+            self._frame_self.get(leaf_frame_id, 0) + weight
+        )
+
+        for frame_id in set(frame_ids):
+            self._frame_cumulative[frame_id] = (
+                self._frame_cumulative.get(frame_id, 0) + weight
+            )
+
+    def export(self, filename):
+        with open(filename, "w", encoding="utf-8") as output:
+            self._write_message(output, self._build_meta_record())
+            self._write_chunked_defs(output, "str_def", self._strings)
+            self._write_chunked_defs(output, "frame_def", self._frames)
+            self._write_chunked_agg(output, self._iter_agg_entries())
+            self._write_message(
+                output,
+                {
+                    "type": "end",
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "samples_total": self._samples_total,
+                },
+            )
+
+        print(f"NDJSON profile written to {filename}")
+
+    def _build_meta_record(self):
+        record = {
+            "type": "meta",
+            "v": 1,
+            "run_id": self.run_id,
+            "sample_interval_usec": self.sample_interval_usec,
+        }
+
+        if self._mode is not None:
+            record["mode"] = _MODE_NAMES.get(self._mode, str(self._mode))
+
+        return record
+
+    def _get_or_create_frame_id(self, filename, location, funcname):
+        synthetic = location is None
+        location_fields = self._normalize_export_location(location)
+        func_str_id = self._intern_string(funcname)
+        path_str_id = self._intern_string(filename)
+
+        frame_key = (
+            path_str_id,
+            func_str_id,
+            location_fields["line"],
+            location_fields.get("end_line"),
+            location_fields.get("col"),
+            location_fields.get("end_col"),
+            synthetic,
+        )
+
+        if (frame_id := self._frame_to_id.get(frame_key)) is not None:
+            return frame_id
+
+        frame_id = len(self._frames) + 1
+        frame_record = {
+            "frame_id": frame_id,
+            "path_str_id": path_str_id,
+            "func_str_id": func_str_id,
+            **location_fields,
+        }
+        if synthetic:
+            frame_record["synthetic"] = True
+
+        self._frame_to_id[frame_key] = frame_id
+        self._frames.append(frame_record)
+        return frame_id
+
+    def _intern_string(self, value):
+        value = str(value)
+
+        if (string_id := self._string_to_id.get(value)) is not None:
+            return string_id
+
+        string_id = len(self._strings) + 1
+        self._string_to_id[value] = string_id
+        self._strings.append({"str_id": string_id, "value": value})
+        return string_id
+
+    @staticmethod
+    def _normalize_export_location(location):
+        if location is None:
+            return {"line": 0}
+
+        if isinstance(location, int):
+            return {"line": max(location, 0)}
+
+        if not isinstance(location, tuple):
+            lineno = getattr(location, "lineno", 0)
+            location = (
+                lineno,
+                getattr(location, "end_lineno", lineno),
+                getattr(location, "col_offset", -1),
+                getattr(location, "end_col_offset", -1),
+            )
+
+        lineno, end_lineno, col_offset, end_col_offset = location
+        if not isinstance(lineno, int) or lineno <= 0:
+            return {"line": 0}
+
+        normalized = {"line": lineno}
+        if isinstance(end_lineno, int) and end_lineno > 0:
+            normalized["end_line"] = end_lineno
+        if isinstance(col_offset, int) and col_offset >= 0:
+            normalized["col"] = col_offset
+        if isinstance(end_col_offset, int) and end_col_offset >= 0:
+            normalized["end_col"] = end_col_offset
+        return normalized
+
+    def _iter_agg_entries(self):
+        entries = []
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            entries.append(
+                {
+                    "frame_id": frame_id,
+                    "self": self._frame_self.get(frame_id, 0),
+                    "cumulative": self._frame_cumulative.get(frame_id, 0),
+                }
+            )
+        return entries
+
+    def _write_chunked_defs(self, output, record_type, entries):
+        for chunk in batched(entries, _CHUNK_SIZE):
+            self._write_message(
+                output,
+                {
+                    "type": record_type,
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "defs": chunk,
+                },
+            )
+
+    def _write_chunked_agg(self, output, entries):
+        for chunk in batched(entries, _CHUNK_SIZE):
+            self._write_message(
+                output,
+                {
+                    "type": "agg",
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "kind": "frame",
+                    "scope": "final",
+                    "samples_total": self._samples_total,
+                    "entries": chunk,
+                },
+            )
+
+    @staticmethod
+    def _write_message(output, record):
+        output.write(json.dumps(record, separators=(",", ":")))
+        output.write("\n")

From f13d34c02b4b3a3e507e8863253c8f3c672484e9 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Sat, 21 Mar 2026 21:08:18 +0100
Subject: [PATCH 02/38] s/ndjson/jsonl/

---
 Lib/profiling/sampling/__init__.py               |  4 ++--
 Lib/profiling/sampling/binary_reader.py          |  6 +++---
 Lib/profiling/sampling/cli.py                    | 16 ++++++++--------
 .../{ndjson_collector.py => jsonl_collector.py}  |  8 ++++----
 4 files changed, 17 insertions(+), 17 deletions(-)
 rename Lib/profiling/sampling/{ndjson_collector.py => jsonl_collector.py} (97%)

diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py
index 21d3a773a2ba63..71579a3903253e 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -9,7 +9,7 @@
 from .stack_collector import CollapsedStackCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
-from .ndjson_collector import NdjsonCollector
+from .jsonl_collector import JsonlCollector
 from .string_table import StringTable
 
 __all__ = (
@@ -18,6 +18,6 @@
     "CollapsedStackCollector",
     "HeatmapCollector",
     "GeckoCollector",
-    "NdjsonCollector",
+    "JsonlCollector",
     "StringTable",
 )
diff --git a/Lib/profiling/sampling/binary_reader.py b/Lib/profiling/sampling/binary_reader.py
index d5bfc0d6130f1a..8d1d8eef9155eb 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -4,7 +4,7 @@
 
 from .gecko_collector import GeckoCollector
 from .stack_collector import FlamegraphCollector, CollapsedStackCollector
-from .ndjson_collector import NdjsonCollector
+from .jsonl_collector import JsonlCollector
 from .pstats_collector import PstatsCollector
 
 
@@ -118,8 +118,8 @@ def convert_binary_to_format(input_file, output_file, output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
-        elif output_format == 'ndjson':
-            collector = NdjsonCollector(interval)
+        elif output_format == 'jsonl':
+            collector = JsonlCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
 
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
index 4f9e784f80495d..bb97c9729364cc 100644
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@@ -19,7 +19,7 @@
 from .stack_collector import CollapsedStackCollector, FlamegraphCollector
 from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
-from .ndjson_collector import NdjsonCollector
+from .jsonl_collector import JsonlCollector
 from .binary_collector import BinaryCollector
 from .binary_reader import BinaryReader
 from .constants import (
@@ -88,7 +88,7 @@ class CustomFormatter(
     "flamegraph": "html",
     "gecko": "json",
     "heatmap": "html",
-    "ndjson": "ndjson",
+    "jsonl": "jsonl",
     "binary": "bin",
 }
 
@@ -98,7 +98,7 @@ class CustomFormatter(
     "flamegraph": FlamegraphCollector,
     "gecko": GeckoCollector,
     "heatmap": HeatmapCollector,
-    "ndjson": NdjsonCollector,
+    "jsonl": JsonlCollector,
     "binary": BinaryCollector,
 }
 
@@ -471,11 +471,11 @@ def _add_format_options(parser, include_compression=True, include_binary=True):
         help="Generate interactive HTML heatmap visualization with line-level sample counts",
     )
     format_group.add_argument(
-        "--ndjson",
+        "--jsonl",
         action="store_const",
-        const="ndjson",
+        const="jsonl",
         dest="format",
-        help="Generate NDJSON snapshot output for external consumers",
+        help="Generate JSONL snapshot output for external consumers",
     )
     if include_binary:
         format_group.add_argument(
@@ -560,7 +560,7 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
 
     Args:
         format_type: The output format ('pstats', 'collapsed', 'flamegraph',
-                    'gecko', 'heatmap', 'ndjson', 'binary')
+                    'gecko', 'heatmap', 'jsonl', 'binary')
         sample_interval_usec: Sampling interval in microseconds
         skip_idle: Whether to skip idle samples
         opcodes: Whether to collect opcode information (only used by gecko format
@@ -589,7 +589,7 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
         skip_idle = False
         return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes)
 
-    if format_type == "ndjson":
+    if format_type == "jsonl":
         return collector_class(
             sample_interval_usec, skip_idle=skip_idle, mode=mode
         )
diff --git a/Lib/profiling/sampling/ndjson_collector.py b/Lib/profiling/sampling/jsonl_collector.py
similarity index 97%
rename from Lib/profiling/sampling/ndjson_collector.py
rename to Lib/profiling/sampling/jsonl_collector.py
index 123ec1c5ea9a1c..1d6575425c2616 100644
--- a/Lib/profiling/sampling/ndjson_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -1,4 +1,4 @@
-"""NDJSON collector."""
+"""JSONL collector."""
 
 import json
 import uuid
@@ -25,8 +25,8 @@
 }
 
 
-class NdjsonCollector(StackTraceCollector):
-    """Collector that exports finalized profiling data as NDJSON."""
+class JsonlCollector(StackTraceCollector):
+    """Collector that exports finalized profiling data as JSONL."""
 
     def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         super().__init__(sample_interval_usec, skip_idle=skip_idle)
@@ -81,7 +81,7 @@ def export(self, filename):
                 },
             )
 
-        print(f"NDJSON profile written to {filename}")
+        print(f"JSONL profile written to {filename}")
 
     def _build_meta_record(self):
         record = {

From c15d318022cf9c226cd36b36818a270f994fb99c Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Sun, 22 Mar 2026 02:51:12 +0100
Subject: [PATCH 03/38] printing to stdout isn't a great idea

---
 Lib/profiling/sampling/jsonl_collector.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 1d6575425c2616..3333b7352c9411 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -81,8 +81,6 @@ def export(self, filename):
                 },
             )
 
-        print(f"JSONL profile written to {filename}")
-
     def _build_meta_record(self):
         record = {
             "type": "meta",

From cb27fc035d79a7611bd7583d82c02f92b4980a93 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Mon, 30 Mar 2026 22:50:59 +0200
Subject: [PATCH 04/38] even a basic test

---
 .../test_sampling_profiler/test_collectors.py | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 86fb9d4c05b3bc..66052a8b26f3c1 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -16,6 +16,7 @@
         CollapsedStackCollector,
         FlamegraphCollector,
     )
+    from profiling.sampling.jsonl_collector import JsonlCollector
     from profiling.sampling.gecko_collector import GeckoCollector
     from profiling.sampling.collector import extract_lineno, normalize_location
     from profiling.sampling.opcode_utils import get_opcode_info, format_opcode
@@ -1665,6 +1666,86 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff"], -1.0)
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
+    def test_jsonl_collector_basic(self):
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        collector = JsonlCollector(1000)
+        run_id = collector.run_id
+
+        self.assertIsNotNone(run_id)
+
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])]
+            )
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        with captured_stdout(), captured_stderr():
+            collector.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "func1"},
+                            {"str_id": 2, "value": "file.py"},
+                            {"str_id": 3, "value": "func2"},
+                            {"str_id": 4, "value": "other_func"},
+                            {"str_id": 5, "value": "other.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 10, "end_line": 10},
+                            {"frame_id": 2, "path_str_id": 2, "func_str_id": 3,
+                             "line": 20, "end_line": 20},
+                            {"frame_id": 3, "path_str_id": 5, "func_str_id": 4,
+                             "line": 5, "end_line": 5}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "entries": [{"frame_id": 1, "self": 2, "cumulative": 2},
+                               {"frame_id": 2, "self": 0, "cumulative": 2},
+                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 3}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
     """Tests for correct handling of recursive functions in cumulative stats."""

From 59cbb4a9cb7d7a7fe018d0c22d9d2a23e5b67d1f Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Mon, 30 Mar 2026 22:54:22 +0200
Subject: [PATCH 05/38] separate func for end record

---
 Lib/profiling/sampling/jsonl_collector.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 3333b7352c9411..59ab3b865c182c 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -71,15 +71,7 @@ def export(self, filename):
             self._write_chunked_defs(output, "str_def", self._strings)
             self._write_chunked_defs(output, "frame_def", self._frames)
             self._write_chunked_agg(output, self._iter_agg_entries())
-            self._write_message(
-                output,
-                {
-                    "type": "end",
-                    "v": 1,
-                    "run_id": self.run_id,
-                    "samples_total": self._samples_total,
-                },
-            )
+            self._write_message(output, self._build_end_record())
 
     def _build_meta_record(self):
         record = {
@@ -94,6 +86,16 @@ def _build_meta_record(self):
 
         return record
 
+    def _build_end_record(self):
+        record = {
+            "type": "end",
+            "v": 1,
+            "run_id": self.run_id,
+            "samples_total": self._samples_total,
+        }
+
+        return record
+
     def _get_or_create_frame_id(self, filename, location, funcname):
         synthetic = location is None
         location_fields = self._normalize_export_location(location)

From 25c692207966798b82e50729caba58e9e0f4b708 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Mon, 30 Mar 2026 23:14:18 +0200
Subject: [PATCH 06/38] proper name

---
 .../test_profiling/test_sampling_profiler/test_collectors.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 66052a8b26f3c1..6127284618a13d 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1666,7 +1666,7 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff"], -1.0)
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
-    def test_jsonl_collector_basic(self):
+    def test_jsonl_collector_export(self):
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, collapsed_out)
 

From 67cd39a0a07e3e74ab025ad2b39858f2d3bef275 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 09:48:26 +0200
Subject: [PATCH 07/38] test_jsonl_collector_with_location_info

---
 .../test_sampling_profiler/test_collectors.py | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 6127284618a13d..175d2f7c263809 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2145,6 +2145,56 @@ def test_gecko_collector_with_location_info(self):
         # Verify function name is in string table
         self.assertIn("handle_request", string_array)
 
+    def test_jsonl_collector_with_location_info(self):
+        """Test JsonlCollector handles LocationInfo properly."""
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+        run_id = collector.run_id
+
+        # Frame with LocationInfo
+        frame = MockFrameInfo("test.py", 42, "my_function")
+        frames = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+
+        # Should extract lineno from location
+        with captured_stdout(), captured_stderr():
+            collector.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "my_function"},
+                            {"str_id": 2, "value": "test.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 42, "end_line": 42}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 1,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 1}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
 
 class TestOpcodeHandling(unittest.TestCase):
     """Tests for opcode field handling in collectors."""

From 7c85d474ffd94cb654ab3ebe68f631ecd75faeb5 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 09:52:15 +0200
Subject: [PATCH 08/38] test synthetic frames

---
 .../test_sampling_profiler/test_collectors.py | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 175d2f7c263809..d04b8a49871f48 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2145,6 +2145,7 @@ def test_gecko_collector_with_location_info(self):
         # Verify function name is in string table
         self.assertIn("handle_request", string_array)
 
+
     def test_jsonl_collector_with_location_info(self):
         """Test JsonlCollector handles LocationInfo properly."""
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)
@@ -2196,6 +2197,59 @@ def jsonl(obj):
             self.assertIn(exp, lines)
 
 
+    def test_jsonl_collector_with_none_location(self):
+        """Test JsonlCollector handles None location (synthetic frames)."""
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+        run_id = collector.run_id
+
+        # Create frame with None location (like GC frame)
+        frame = MockFrameInfo("~", 0, "<GC>")
+        frame.location = None  # Synthetic frame has no location
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+            )
+        ]
+        collector.collect(frames)
+
+        # Should handle None location as synthetic frame
+        with captured_stdout(), captured_stderr():
+            collector.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "<GC>"},
+                            {"str_id": 2, "value": "~"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 0, "synthetic": True}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 1,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 1}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
+
 class TestOpcodeHandling(unittest.TestCase):
     """Tests for opcode field handling in collectors."""
 

From 3eddae83d550962779f11225c3a55f3bd52f37de Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 10:02:08 +0200
Subject: [PATCH 09/38] too many new lines

---
 .../test_profiling/test_sampling_profiler/test_collectors.py    | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index d04b8a49871f48..f46cfc1dbcd0b1 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2145,7 +2145,6 @@ def test_gecko_collector_with_location_info(self):
         # Verify function name is in string table
         self.assertIn("handle_request", string_array)
 
-
     def test_jsonl_collector_with_location_info(self):
         """Test JsonlCollector handles LocationInfo properly."""
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)
@@ -2196,7 +2195,6 @@ def jsonl(obj):
         for exp in expected:
             self.assertIn(exp, lines)
 
-
     def test_jsonl_collector_with_none_location(self):
         """Test JsonlCollector handles None location (synthetic frames)."""
         collapsed_out = tempfile.NamedTemporaryFile(delete=False)

From f71252ed7b63a7ce65ef83caf4d7f73ca4899e28 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 10:21:42 +0200
Subject: [PATCH 10/38] BUG? confusing... two ways to set skip_idle?

---
 .../test_sampling_profiler/test_modes.py      | 157 +++++++++++++++++-
 1 file changed, 155 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 0b38fb4ad4bcf6..67b82eff091d08 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -9,6 +9,7 @@
     import profiling.sampling
     import profiling.sampling.sample
     from profiling.sampling.pstats_collector import PstatsCollector
+    from profiling.sampling.jsonl_collector import JsonlCollector
     from profiling.sampling.cli import main, _parse_mode
     from profiling.sampling.constants import PROFILING_MODE_EXCEPTION
     from _remote_debugging import (
@@ -20,9 +21,13 @@
         "Test only runs when _remote_debugging is available"
     )
 
-from test.support import requires_remote_subprocess_debugging
+from test.support import (
+    captured_stdout,
+    captured_stderr,
+    requires_remote_subprocess_debugging,
+)
 
-from .helpers import test_subprocess
+from .helpers import close_and_unlink, test_subprocess
 from .mocks import MockFrameInfo, MockInterpreterInfo
 
 
@@ -228,6 +233,154 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
+    def test_jsonl_collector_rspects_skip_idle(self):
+        """Test that frames are actually filtered when skip_idle=True."""
+        import tempfile
+        import json
+
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        # Create mock frames with different thread statuses
+        class MockThreadInfoWithStatus:
+            def __init__(self, thread_id, frame_info, status):
+                self.thread_id = thread_id
+                self.frame_info = frame_info
+                self.status = status
+
+        # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
+        ACTIVE_STATUS = (
+            THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        )  # Has GIL and on CPU
+        IDLE_STATUS = 0  # Neither has GIL nor on CPU
+
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfoWithStatus(
+                        1,
+                        [MockFrameInfo("active1.py", 10, "active_func1")],
+                        ACTIVE_STATUS,
+                    ),
+                    MockThreadInfoWithStatus(
+                        2,
+                        [MockFrameInfo("idle.py", 20, "idle_func")],
+                        IDLE_STATUS,
+                    ),
+                    MockThreadInfoWithStatus(
+                        3,
+                        [MockFrameInfo("active2.py", 30, "active_func2")],
+                        ACTIVE_STATUS,
+                    ),
+                ],
+            )
+        ]
+
+        # Test with skip_idle=True - should only process running threads
+        collector_skip = JsonlCollector(
+            sample_interval_usec=1000, skip_idle=True
+        )
+        collector_skip.collect(test_frames)
+
+        run_id = collector_skip.run_id
+
+        # Should only have functions from running threads (status 0)
+        with captured_stdout(), captured_stderr():
+            collector_skip.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        def jsonl(obj):
+            return json.dumps(obj, separators=(",", ":"))
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "active_func1"},
+                            {"str_id": 2, "value": "active1.py"},
+                            {"str_id": 3, "value": "idle_func"},
+                            {"str_id": 4, "value": "idle.py"},
+                            {"str_id": 5, "value": "active_func2"},
+                            {"str_id": 6, "value": "active2.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 10, "end_line": 10},
+                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
+                             "line": 20, "end_line": 20},
+                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
+                             "line": 30, "end_line": 30}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
+                               {"frame_id": 2, "self": 1, "cumulative": 1},
+                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 3}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
+        # Test with skip_idle=False - should process all threads
+        collector_no_skip = JsonlCollector(
+            sample_interval_usec=1000, skip_idle=False
+        )
+        collector_no_skip.collect(test_frames)
+
+        run_id = collector_no_skip.run_id
+
+        # Should have functions from all threads
+        with captured_stdout(), captured_stderr():
+            collector_no_skip.export(collapsed_out.name)
+
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 5)
+
+        expected = [
+            jsonl({"type": "meta", "v": 1, "run_id": run_id,
+                   "sample_interval_usec": 1000}),
+            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
+                   "defs": [{"str_id": 1, "value": "active_func1"},
+                            {"str_id": 2, "value": "active1.py"},
+                            {"str_id": 3, "value": "idle_func"},
+                            {"str_id": 4, "value": "idle.py"},
+                            {"str_id": 5, "value": "active_func2"},
+                            {"str_id": 6, "value": "active2.py"}]}),
+            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
+                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
+                             "line": 10, "end_line": 10},
+                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
+                             "line": 20, "end_line": 20},
+                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
+                             "line": 30, "end_line": 30}]}),
+            jsonl({"type": "agg", "v": 1, "run_id": run_id,
+                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
+                               {"frame_id": 2, "self": 1, "cumulative": 1},
+                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+            jsonl({"type": "end", "v": 1, "run_id": run_id,
+                   "samples_total": 3}),
+        ]
+
+        for exp in expected:
+            self.assertIn(exp, lines)
+
+        # self.assertIn(active1_key, collector_no_skip.result)
+        # self.assertIn(active2_key, collector_no_skip.result)
+        # self.assertIn(
+        #     idle_key, collector_no_skip.result
+        # )  # Idle thread should be included
 
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):

From c183109f8a3a22153607e4cdf618d2dcc1b62c78 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 12:51:16 +0200
Subject: [PATCH 11/38] ok, thx b4fac15613a16f9cd7b2ee32840523b399f4621f

---
 .../test_sampling_profiler/test_modes.py          | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 67b82eff091d08..9d792b8d6f20ab 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -305,24 +305,19 @@ def jsonl(obj):
             jsonl({"type": "str_def", "v": 1, "run_id": run_id,
                    "defs": [{"str_id": 1, "value": "active_func1"},
                             {"str_id": 2, "value": "active1.py"},
-                            {"str_id": 3, "value": "idle_func"},
-                            {"str_id": 4, "value": "idle.py"},
-                            {"str_id": 5, "value": "active_func2"},
-                            {"str_id": 6, "value": "active2.py"}]}),
+                            {"str_id": 3, "value": "active_func2"},
+                            {"str_id": 4, "value": "active2.py"}]}),
             jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
                    "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
                              "line": 10, "end_line": 10},
                             {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
-                             "line": 20, "end_line": 20},
-                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
                              "line": 30, "end_line": 30}]}),
             jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 3,
+                   "kind": "frame", "scope": "final", "samples_total": 2,
                    "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
-                               {"frame_id": 2, "self": 1, "cumulative": 1},
-                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
+                               {"frame_id": 2, "self": 1, "cumulative": 1}]}),
             jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 3}),
+                   "samples_total": 2}),
         ]
 
         for exp in expected:

From f20eb52efbb6b9a14c868affb25944d229b26cb7 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:16:36 +0200
Subject: [PATCH 12/38] check if it works fine with (file, loc, func, op)

---
 .../test_sampling_profiler/test_collectors.py | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index f46cfc1dbcd0b1..dcf0b09828a790 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2467,6 +2467,38 @@ def test_gecko_collector_frame_format(self):
         # Should have recorded 3 functions
         self.assertEqual(thread["funcTable"]["length"], 3)
 
+    def test_jsonl_collector_frame_format(self):
+        """Test JsonlCollector with 4-element frame format."""
+        collector = JsonlCollector(sample_interval_usec=1000)
+        collector.collect(self._make_sample_frames())
+
+        with tempfile.NamedTemporaryFile(delete=False) as f:
+            self.addClassCleanup(close_and_unlink, f)
+            collector.export(f.name)
+
+        with open(f.name, "r", encoding="utf-8") as fp:
+            records = [json.loads(line) for line in fp]
+
+        str_defs = {
+            item["str_id"]: item["value"]
+            for record in records
+            if record["type"] == "str_def"
+            for item in record["defs"]
+        }
+        frame_defs = [
+            item
+            for record in records
+            if record["type"] == "frame_def"
+            for item in record["defs"]
+        ]
+
+        self.assertEqual(len(frame_defs), 3)
+
+        paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+        funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+
+        self.assertEqual(paths, {"app.py", "utils.py", "lib.py"})
+        self.assertEqual(funcs, {"main", "helper", "process"})
 
 class TestInternalFrameFiltering(unittest.TestCase):
     """Tests for filtering internal profiler frames from output."""

From 546ce90a007f30469e2c9f9a83240ba2a4d05e9d Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:19:44 +0200
Subject: [PATCH 13/38] missing new line

---
 .../test_profiling/test_sampling_profiler/test_collectors.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index dcf0b09828a790..084c3c549f99d0 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2500,6 +2500,7 @@ def test_jsonl_collector_frame_format(self):
         self.assertEqual(paths, {"app.py", "utils.py", "lib.py"})
         self.assertEqual(funcs, {"main", "helper", "process"})
 
+
 class TestInternalFrameFiltering(unittest.TestCase):
     """Tests for filtering internal profiler frames from output."""
 

From 350ad99bc7d3c4c9170be60b447c7329c2694654 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:25:20 +0200
Subject: [PATCH 14/38] filter out sync coordinator

---
 .../test_sampling_profiler/test_collectors.py | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 084c3c549f99d0..d80288c6ec959c 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2627,3 +2627,54 @@ def test_collapsed_stack_collector_filters_internal_frames(self):
         for (call_tree, _), _ in collector.stack_counter.items():
             for filename, _, _ in call_tree:
                 self.assertNotIn("_sync_coordinator", filename)
+
+    def test_jsonl_collector_filters_internal_frames(self):
+        """Test that JsonlCollector filters out internal frames."""
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(sample_interval_usec=1000)
+
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("app.py", 50, "run"),
+                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
+                            MockFrameInfo("<frozen runpy>", 87, "_run_code"),
+                        ],
+                        status=THREAD_STATUS_HAS_GIL,
+                    )
+                ],
+            )
+        ]
+
+        collector.collect(frames)
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        str_defs = {
+            item["str_id"]: item["value"]
+            for record in records
+            if record["type"] == "str_def"
+            for item in record["defs"]
+        }
+        frame_defs = [
+            item
+            for record in records
+            if record["type"] == "frame_def"
+            for item in record["defs"]
+        ]
+
+        paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+
+        self.assertIn("app.py", paths)
+        self.assertIn("<frozen runpy>", paths)
+
+        for path in paths:
+            self.assertNotIn("_sync_coordinator", path)

From 942d821da00c9d3b879ea0608b071fec9aba6236 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:30:12 +0200
Subject: [PATCH 15/38] s/collapsed_out/jsonl_out/, less copying :D

---
 .../test_sampling_profiler/test_collectors.py | 24 +++++++++----------
 .../test_sampling_profiler/test_modes.py      | 12 +++++-----
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index d80288c6ec959c..8432f2ac6de398 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1667,8 +1667,8 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
     def test_jsonl_collector_export(self):
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(1000)
         run_id = collector.run_id
@@ -1706,10 +1706,10 @@ def test_jsonl_collector_export(self):
         collector.collect(test_frames3)
 
         with captured_stdout(), captured_stderr():
-            collector.export(collapsed_out.name)
+            collector.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
@@ -2147,8 +2147,8 @@ def test_gecko_collector_with_location_info(self):
 
     def test_jsonl_collector_with_location_info(self):
         """Test JsonlCollector handles LocationInfo properly."""
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
         run_id = collector.run_id
@@ -2164,10 +2164,10 @@ def test_jsonl_collector_with_location_info(self):
 
         # Should extract lineno from location
         with captured_stdout(), captured_stderr():
-            collector.export(collapsed_out.name)
+            collector.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
@@ -2197,8 +2197,8 @@ def jsonl(obj):
 
     def test_jsonl_collector_with_none_location(self):
         """Test JsonlCollector handles None location (synthetic frames)."""
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
         run_id = collector.run_id
@@ -2216,10 +2216,10 @@ def test_jsonl_collector_with_none_location(self):
 
         # Should handle None location as synthetic frame
         with captured_stdout(), captured_stderr():
-            collector.export(collapsed_out.name)
+            collector.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 9d792b8d6f20ab..a4c7ed857ce7fb 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -238,8 +238,8 @@ def test_jsonl_collector_rspects_skip_idle(self):
         import tempfile
         import json
 
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
 
         # Create mock frames with different thread statuses
         class MockThreadInfoWithStatus:
@@ -287,10 +287,10 @@ def __init__(self, thread_id, frame_info, status):
 
         # Should only have functions from running threads (status 0)
         with captured_stdout(), captured_stderr():
-            collector_skip.export(collapsed_out.name)
+            collector_skip.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")
@@ -333,10 +333,10 @@ def jsonl(obj):
 
         # Should have functions from all threads
         with captured_stdout(), captured_stderr():
-            collector_no_skip.export(collapsed_out.name)
+            collector_no_skip.export(jsonl_out.name)
 
         # Check file contents
-        with open(collapsed_out.name, "r") as f:
+        with open(jsonl_out.name, "r") as f:
             content = f.read()
 
         lines = content.strip().split("\n")

From bd9aefe1fd36ed2123257a62d8586a3f5e308c66 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:31:24 +0200
Subject: [PATCH 16/38] nicer reading

---
 .../test_profiling/test_sampling_profiler/test_collectors.py   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 8432f2ac6de398..e12ea44f566cb4 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2633,8 +2633,6 @@ def test_jsonl_collector_filters_internal_frames(self):
         jsonl_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, jsonl_out)
 
-        collector = JsonlCollector(sample_interval_usec=1000)
-
         frames = [
             MockInterpreterInfo(
                 0,
@@ -2652,6 +2650,7 @@ def test_jsonl_collector_filters_internal_frames(self):
             )
         ]
 
+        collector = JsonlCollector(sample_interval_usec=1000)
         collector.collect(frames)
         collector.export(jsonl_out.name)
 

From 311a4e38b0299dd9a08b9d8084daab8c16cde8a7 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:32:23 +0200
Subject: [PATCH 17/38] typo

---
 Lib/test/test_profiling/test_sampling_profiler/test_modes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index a4c7ed857ce7fb..37cb6c3a5c5ab2 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -233,7 +233,7 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
-    def test_jsonl_collector_rspects_skip_idle(self):
+    def test_jsonl_collector_respects_skip_idle(self):
         """Test that frames are actually filtered when skip_idle=True."""
         import tempfile
         import json

From 749a8686b0ff13a09ae8218c6270484cb0670d14 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:37:39 +0200
Subject: [PATCH 18/38] too much copying, left-over

---
 Lib/test/test_profiling/test_sampling_profiler/test_modes.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 37cb6c3a5c5ab2..2bac26c37091b0 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -371,11 +371,6 @@ def jsonl(obj):
         for exp in expected:
             self.assertIn(exp, lines)
 
-        # self.assertIn(active1_key, collector_no_skip.result)
-        # self.assertIn(active2_key, collector_no_skip.result)
-        # self.assertIn(
-        #     idle_key, collector_no_skip.result
-        # )  # Idle thread should be included
 
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):

From 85ce978c53b3864e0fb720fcc0f9d1e101f5b1fb Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:49:39 +0200
Subject: [PATCH 19/38] just Counter

---
 Lib/profiling/sampling/jsonl_collector.py | 87 +++++++++++------------
 1 file changed, 40 insertions(+), 47 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 59ab3b865c182c..56539c2a9e2232 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -1,5 +1,6 @@
 """JSONL collector."""
 
+from collections import Counter
 import json
 import uuid
 from itertools import batched
@@ -38,8 +39,8 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         self._frame_to_id = {}
         self._frames = []
 
-        self._frame_self = {}
-        self._frame_cumulative = {}
+        self._frame_self = Counter()
+        self._frame_cumulative = Counter()
         self._samples_total = 0
 
         self._mode = mode
@@ -56,21 +57,39 @@ def process_frames(self, frames, _thread_id, weight=1):
         ]
         leaf_frame_id = frame_ids[0]
 
-        self._frame_self[leaf_frame_id] = (
-            self._frame_self.get(leaf_frame_id, 0) + weight
-        )
+        self._frame_self[leaf_frame_id] += weight
 
         for frame_id in set(frame_ids):
-            self._frame_cumulative[frame_id] = (
-                self._frame_cumulative.get(frame_id, 0) + weight
-            )
+            self._frame_cumulative[frame_id] += weight
 
     def export(self, filename):
         with open(filename, "w", encoding="utf-8") as output:
             self._write_message(output, self._build_meta_record())
-            self._write_chunked_defs(output, "str_def", self._strings)
-            self._write_chunked_defs(output, "frame_def", self._frames)
-            self._write_chunked_agg(output, self._iter_agg_entries())
+            self._write_chunked_records(
+                output,
+                {"type": "str_def", "v": 1, "run_id": self.run_id},
+                "defs",
+                self._strings,
+            )
+            self._write_chunked_records(
+                output,
+                {"type": "frame_def", "v": 1, "run_id": self.run_id},
+                "defs",
+                self._frames,
+            )
+            self._write_chunked_records(
+                output,
+                {
+                    "type": "agg",
+                    "v": 1,
+                    "run_id": self.run_id,
+                    "kind": "frame",
+                    "scope": "final",
+                    "samples_total": self._samples_total,
+                },
+                "entries",
+                self._iter_agg_entries(),
+            )
             self._write_message(output, self._build_end_record())
 
     def _build_meta_record(self):
@@ -171,44 +190,18 @@ def _normalize_export_location(location):
         return normalized
 
     def _iter_agg_entries(self):
-        entries = []
-        for frame_record in self._frames:
-            frame_id = frame_record["frame_id"]
-            entries.append(
-                {
-                    "frame_id": frame_id,
-                    "self": self._frame_self.get(frame_id, 0),
-                    "cumulative": self._frame_cumulative.get(frame_id, 0),
-                }
-            )
-        return entries
-
-    def _write_chunked_defs(self, output, record_type, entries):
-        for chunk in batched(entries, _CHUNK_SIZE):
-            self._write_message(
-                output,
-                {
-                    "type": record_type,
-                    "v": 1,
-                    "run_id": self.run_id,
-                    "defs": chunk,
-                },
-            )
+        return [
+            {
+                "frame_id": frame_record["frame_id"],
+                "self": self._frame_self[frame_record["frame_id"]],
+                "cumulative": self._frame_cumulative[frame_record["frame_id"]],
+            }
+            for frame_record in self._frames
+        ]
 
-    def _write_chunked_agg(self, output, entries):
+    def _write_chunked_records(self, output, base_record, chunk_field, entries):
         for chunk in batched(entries, _CHUNK_SIZE):
-            self._write_message(
-                output,
-                {
-                    "type": "agg",
-                    "v": 1,
-                    "run_id": self.run_id,
-                    "kind": "frame",
-                    "scope": "final",
-                    "samples_total": self._samples_total,
-                    "entries": chunk,
-                },
-            )
+            self._write_message(output, {**base_record, chunk_field: chunk})
 
     @staticmethod
     def _write_message(output, record):

From 820d3b9f85dbda7d653d774f29b9d064517134a4 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:52:11 +0200
Subject: [PATCH 20/38] ruff

---
 Lib/profiling/sampling/jsonl_collector.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 56539c2a9e2232..244501ba446f07 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -199,7 +199,9 @@ def _iter_agg_entries(self):
             for frame_record in self._frames
         ]
 
-    def _write_chunked_records(self, output, base_record, chunk_field, entries):
+    def _write_chunked_records(
+        self, output, base_record, chunk_field, entries
+    ):
         for chunk in batched(entries, _CHUNK_SIZE):
             self._write_message(output, {**base_record, chunk_field: chunk})
 

From aad4b180d71cc02f1d92daefda79e2f14e37de02 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:56:14 +0200
Subject: [PATCH 21/38] future-proof name

---
 Lib/profiling/sampling/jsonl_collector.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 244501ba446f07..7d7b44c8d89407 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -88,7 +88,7 @@ def export(self, filename):
                     "samples_total": self._samples_total,
                 },
                 "entries",
-                self._iter_agg_entries(),
+                self._iter_final_agg_entries(),
             )
             self._write_message(output, self._build_end_record())
 
@@ -189,7 +189,7 @@ def _normalize_export_location(location):
             normalized["end_col"] = end_col_offset
         return normalized
 
-    def _iter_agg_entries(self):
+    def _iter_final_agg_entries(self):
         return [
             {
                 "frame_id": frame_record["frame_id"],

From da3e754fa64f906c75d6937d23e7b0365426a1b7 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 13:56:58 +0200
Subject: [PATCH 22/38] future-proof iter for streaming

---
 Lib/profiling/sampling/jsonl_collector.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 7d7b44c8d89407..1b318573425edf 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -190,14 +190,13 @@ def _normalize_export_location(location):
         return normalized
 
     def _iter_final_agg_entries(self):
-        return [
-            {
-                "frame_id": frame_record["frame_id"],
-                "self": self._frame_self[frame_record["frame_id"]],
-                "cumulative": self._frame_cumulative[frame_record["frame_id"]],
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            yield {
+                "frame_id": frame_id,
+                "self": self._frame_self[frame_id],
+                "cumulative": self._frame_cumulative[frame_id],
             }
-            for frame_record in self._frames
-        ]
 
     def _write_chunked_records(
         self, output, base_record, chunk_field, entries

From cb6ed347142cfb3550f228584eafe98a87099e38 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 14:13:23 +0200
Subject: [PATCH 23/38] truth to be told, this should be layer above

---
 Lib/profiling/sampling/collector.py           |  5 ++-
 Lib/profiling/sampling/jsonl_collector.py     | 42 +++++++------------
 .../test_sampling_profiler/test_collectors.py |  5 +++
 3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 7dc095c6c279bd..dc6eb751b99e15 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -20,13 +20,16 @@ def normalize_location(location):
     """Normalize location to a 4-tuple format.
 
     Args:
-        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
+        location: tuple (lineno, end_lineno, col_offset, end_col_offset),
+            an integer line number, or None
 
     Returns:
         tuple: (lineno, end_lineno, col_offset, end_col_offset)
     """
     if location is None:
         return DEFAULT_LOCATION
+    if isinstance(location, int):
+        return (location, location, -1, -1)
     return location
 
 
diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 1b318573425edf..6c8f2bc2fd3135 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -12,6 +12,7 @@
     PROFILING_MODE_GIL,
     PROFILING_MODE_WALL,
 )
+from .collector import normalize_location
 from .stack_collector import StackTraceCollector
 
 
@@ -117,7 +118,7 @@ def _build_end_record(self):
 
     def _get_or_create_frame_id(self, filename, location, funcname):
         synthetic = location is None
-        location_fields = self._normalize_export_location(location)
+        location_fields = self._location_to_export_fields(location)
         func_str_id = self._intern_string(funcname)
         path_str_id = self._intern_string(filename)
 
@@ -160,34 +161,19 @@ def _intern_string(self, value):
         return string_id
 
     @staticmethod
-    def _normalize_export_location(location):
-        if location is None:
-            return {"line": 0}
-
-        if isinstance(location, int):
-            return {"line": max(location, 0)}
-
-        if not isinstance(location, tuple):
-            lineno = getattr(location, "lineno", 0)
-            location = (
-                lineno,
-                getattr(location, "end_lineno", lineno),
-                getattr(location, "col_offset", -1),
-                getattr(location, "end_col_offset", -1),
-            )
+    def _location_to_export_fields(location):
+        lineno, end_lineno, col_offset, end_col_offset = normalize_location(
+            location
+        )
 
-        lineno, end_lineno, col_offset, end_col_offset = location
-        if not isinstance(lineno, int) or lineno <= 0:
-            return {"line": 0}
-
-        normalized = {"line": lineno}
-        if isinstance(end_lineno, int) and end_lineno > 0:
-            normalized["end_line"] = end_lineno
-        if isinstance(col_offset, int) and col_offset >= 0:
-            normalized["col"] = col_offset
-        if isinstance(end_col_offset, int) and end_col_offset >= 0:
-            normalized["end_col"] = end_col_offset
-        return normalized
+        fields = {"line": lineno}
+        if end_lineno > 0:
+            fields["end_line"] = end_lineno
+        if col_offset >= 0:
+            fields["col"] = col_offset
+        if end_col_offset >= 0:
+            fields["end_col"] = end_col_offset
+        return fields
 
     def _iter_final_agg_entries(self):
         for frame_record in self._frames:
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index e12ea44f566cb4..908ecb1464ae5b 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1955,6 +1955,11 @@ def test_extract_lineno_from_none(self):
         """Test extracting lineno from None (synthetic frames)."""
         self.assertEqual(extract_lineno(None), 0)
 
+    def test_normalize_location_with_int(self):
+        """Test normalize_location expands a legacy integer line number."""
+        result = normalize_location(42)
+        self.assertEqual(result, (42, 42, -1, -1))
+
     def test_normalize_location_with_location_info(self):
         """Test normalize_location passes through LocationInfo."""
         loc = LocationInfo(10, 15, 0, 5)

From 5a59e0b5de7205a56d5a165d7b83295a22a9c9fb Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:20:48 +0200
Subject: [PATCH 24/38] helper

---
 .../test_sampling_profiler/test_collectors.py | 255 +++++++++---------
 .../test_sampling_profiler/test_modes.py      | 148 +---------
 2 files changed, 130 insertions(+), 273 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 908ecb1464ae5b..4f1c76de414c5e 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -58,6 +58,25 @@ def find_child_by_name(children, strings, substr):
     return None
 
 
+def _jsonl_tables(records):
+    meta = next(record for record in records if record["type"] == "meta")
+    end = next(record for record in records if record["type"] == "end")
+    agg = next(record for record in records if record["type"] == "agg")
+    str_defs = {
+        item["str_id"]: item["value"]
+        for record in records
+        if record["type"] == "str_def"
+        for item in record["defs"]
+    }
+    frame_defs = [
+        item
+        for record in records
+        if record["type"] == "frame_def"
+        for item in record["defs"]
+    ]
+    return meta, str_defs, frame_defs, agg, end
+
+
 class TestSampleProfilerComponents(unittest.TestCase):
     """Unit tests for individual profiler components."""
 
@@ -1666,14 +1685,12 @@ def test_diff_flamegraph_load_baseline(self):
         self.assertAlmostEqual(cold_node["diff"], -1.0)
         self.assertAlmostEqual(cold_node["diff_pct"], -50.0)
 
-    def test_jsonl_collector_export(self):
+    def test_jsonl_collector_export_exact_output(self):
         jsonl_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(1000)
-        run_id = collector.run_id
-
-        self.assertIsNotNone(run_id)
+        collector.run_id = "run-123"
 
         test_frames1 = [
             MockInterpreterInfo(
@@ -1705,46 +1722,74 @@ def test_jsonl_collector_export(self):
         collector.collect(test_frames2)
         collector.collect(test_frames3)
 
-        with captured_stdout(), captured_stderr():
-            collector.export(jsonl_out.name)
+        collector.export(jsonl_out.name)
 
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
             content = f.read()
 
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "func1"},
-                            {"str_id": 2, "value": "file.py"},
-                            {"str_id": 3, "value": "func2"},
-                            {"str_id": 4, "value": "other_func"},
-                            {"str_id": 5, "value": "other.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 10, "end_line": 10},
-                            {"frame_id": 2, "path_str_id": 2, "func_str_id": 3,
-                             "line": 20, "end_line": 20},
-                            {"frame_id": 3, "path_str_id": 5, "func_str_id": 4,
-                             "line": 5, "end_line": 5}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 3,
-                   "entries": [{"frame_id": 1, "self": 2, "cumulative": 2},
-                               {"frame_id": 2, "self": 0, "cumulative": 2},
-                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 3}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
+        self.assertEqual(
+            content,
+            (
+                '{"type":"meta","v":1,"run_id":"run-123","sample_interval_usec":1000}\n'
+                '{"type":"str_def","v":1,"run_id":"run-123","defs":[{"str_id":1,"value":"func1"},{"str_id":2,"value":"file.py"},{"str_id":3,"value":"func2"},{"str_id":4,"value":"other_func"},{"str_id":5,"value":"other.py"}]}\n'
+                '{"type":"frame_def","v":1,"run_id":"run-123","defs":[{"frame_id":1,"path_str_id":2,"func_str_id":1,"line":10,"end_line":10},{"frame_id":2,"path_str_id":2,"func_str_id":3,"line":20,"end_line":20},{"frame_id":3,"path_str_id":5,"func_str_id":4,"line":5,"end_line":5}]}\n'
+                '{"type":"agg","v":1,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":1,"self":2,"cumulative":2},{"frame_id":2,"self":0,"cumulative":2},{"frame_id":3,"self":1,"cumulative":1}]}\n'
+                '{"type":"end","v":1,"run_id":"run-123","samples_total":3}\n'
+            ),
+        )
+
+    def test_jsonl_collector_skip_idle_filters_threads(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        active_status = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [MockFrameInfo("active1.py", 10, "active_func1")],
+                        status=active_status,
+                    ),
+                    MockThreadInfo(
+                        2,
+                        [MockFrameInfo("idle.py", 20, "idle_func")],
+                        status=0,
+                    ),
+                    MockThreadInfo(
+                        3,
+                        [MockFrameInfo("active2.py", 30, "active_func2")],
+                        status=active_status,
+                    ),
+                ],
+            )
+        ]
+
+        def export_summary(skip_idle):
+            collector = JsonlCollector(1000, skip_idle=skip_idle)
+            collector.collect(frames)
+            collector.export(jsonl_out.name)
+
+            with open(jsonl_out.name, "r", encoding="utf-8") as f:
+                records = [json.loads(line) for line in f]
+
+            _, str_defs, frame_defs, agg_record, _ = _jsonl_tables(records)
+            paths = {str_defs[item["path_str_id"]] for item in frame_defs}
+            funcs = {str_defs[item["func_str_id"]] for item in frame_defs}
+            return paths, funcs, agg_record["samples_total"]
+
+        paths, funcs, samples_total = export_summary(skip_idle=True)
+        self.assertEqual(paths, {"active1.py", "active2.py"})
+        self.assertEqual(funcs, {"active_func1", "active_func2"})
+        self.assertEqual(samples_total, 2)
+
+        paths, funcs, samples_total = export_summary(skip_idle=False)
+        self.assertEqual(paths, {"active1.py", "idle.py", "active2.py"})
+        self.assertEqual(
+            funcs, {"active_func1", "idle_func", "active_func2"}
+        )
+        self.assertEqual(samples_total, 3)
 
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
@@ -2156,7 +2201,6 @@ def test_jsonl_collector_with_location_info(self):
         self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
-        run_id = collector.run_id
 
         # Frame with LocationInfo
         frame = MockFrameInfo("test.py", 42, "my_function")
@@ -2167,38 +2211,28 @@ def test_jsonl_collector_with_location_info(self):
         ]
         collector.collect(frames)
 
-        # Should extract lineno from location
-        with captured_stdout(), captured_stderr():
-            collector.export(jsonl_out.name)
+        collector.export(jsonl_out.name)
 
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
 
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "my_function"},
-                            {"str_id": 2, "value": "test.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 42, "end_line": 42}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 1,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 1}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
+        meta, str_defs, frame_defs, agg, end = _jsonl_tables(records)
+        self.assertEqual(meta["sample_interval_usec"], 1000)
+        self.assertEqual(agg["samples_total"], 1)
+        self.assertEqual(end["samples_total"], 1)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "test.py")
+        self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "my_function")
+        self.assertEqual(
+            frame_defs[0],
+            {
+                "frame_id": 1,
+                "path_str_id": frame_defs[0]["path_str_id"],
+                "func_str_id": frame_defs[0]["func_str_id"],
+                "line": 42,
+                "end_line": 42,
+            },
+        )
 
     def test_jsonl_collector_with_none_location(self):
         """Test JsonlCollector handles None location (synthetic frames)."""
@@ -2206,7 +2240,6 @@ def test_jsonl_collector_with_none_location(self):
         self.addCleanup(close_and_unlink, jsonl_out)
 
         collector = JsonlCollector(sample_interval_usec=1000)
-        run_id = collector.run_id
 
         # Create frame with None location (like GC frame)
         frame = MockFrameInfo("~", 0, "<GC>")
@@ -2219,38 +2252,28 @@ def test_jsonl_collector_with_none_location(self):
         ]
         collector.collect(frames)
 
-        # Should handle None location as synthetic frame
-        with captured_stdout(), captured_stderr():
-            collector.export(jsonl_out.name)
-
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
+        collector.export(jsonl_out.name)
 
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "<GC>"},
-                            {"str_id": 2, "value": "~"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 0, "synthetic": True}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 1,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 1}),
-        ]
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
 
-        for exp in expected:
-            self.assertIn(exp, lines)
+        meta, str_defs, frame_defs, agg, end = _jsonl_tables(records)
+        self.assertEqual(meta["sample_interval_usec"], 1000)
+        self.assertEqual(agg["samples_total"], 1)
+        self.assertEqual(end["samples_total"], 1)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "~")
+        self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "<GC>")
+        self.assertEqual(
+            frame_defs[0],
+            {
+                "frame_id": 1,
+                "path_str_id": frame_defs[0]["path_str_id"],
+                "func_str_id": frame_defs[0]["func_str_id"],
+                "line": 0,
+                "synthetic": True,
+            },
+        )
 
 
 class TestOpcodeHandling(unittest.TestCase):
@@ -2484,18 +2507,7 @@ def test_jsonl_collector_frame_format(self):
         with open(f.name, "r", encoding="utf-8") as fp:
             records = [json.loads(line) for line in fp]
 
-        str_defs = {
-            item["str_id"]: item["value"]
-            for record in records
-            if record["type"] == "str_def"
-            for item in record["defs"]
-        }
-        frame_defs = [
-            item
-            for record in records
-            if record["type"] == "frame_def"
-            for item in record["defs"]
-        ]
+        _, str_defs, frame_defs, _, _ = _jsonl_tables(records)
 
         self.assertEqual(len(frame_defs), 3)
 
@@ -2662,18 +2674,7 @@ def test_jsonl_collector_filters_internal_frames(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        str_defs = {
-            item["str_id"]: item["value"]
-            for record in records
-            if record["type"] == "str_def"
-            for item in record["defs"]
-        }
-        frame_defs = [
-            item
-            for record in records
-            if record["type"] == "frame_def"
-            for item in record["defs"]
-        ]
+        _, str_defs, frame_defs, _, _ = _jsonl_tables(records)
 
         paths = {str_defs[item["path_str_id"]] for item in frame_defs}
 
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 2bac26c37091b0..6cd636593e3db1 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -9,7 +9,6 @@
     import profiling.sampling
     import profiling.sampling.sample
     from profiling.sampling.pstats_collector import PstatsCollector
-    from profiling.sampling.jsonl_collector import JsonlCollector
     from profiling.sampling.cli import main, _parse_mode
     from profiling.sampling.constants import PROFILING_MODE_EXCEPTION
     from _remote_debugging import (
@@ -21,13 +20,9 @@
         "Test only runs when _remote_debugging is available"
     )
 
-from test.support import (
-    captured_stdout,
-    captured_stderr,
-    requires_remote_subprocess_debugging,
-)
+from test.support import requires_remote_subprocess_debugging
 
-from .helpers import close_and_unlink, test_subprocess
+from .helpers import test_subprocess
 from .mocks import MockFrameInfo, MockInterpreterInfo
 
 
@@ -233,145 +228,6 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
-    def test_jsonl_collector_respects_skip_idle(self):
-        """Test that frames are actually filtered when skip_idle=True."""
-        import tempfile
-        import json
-
-        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, jsonl_out)
-
-        # Create mock frames with different thread statuses
-        class MockThreadInfoWithStatus:
-            def __init__(self, thread_id, frame_info, status):
-                self.thread_id = thread_id
-                self.frame_info = frame_info
-                self.status = status
-
-        # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
-        ACTIVE_STATUS = (
-            THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
-        )  # Has GIL and on CPU
-        IDLE_STATUS = 0  # Neither has GIL nor on CPU
-
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [
-                    MockThreadInfoWithStatus(
-                        1,
-                        [MockFrameInfo("active1.py", 10, "active_func1")],
-                        ACTIVE_STATUS,
-                    ),
-                    MockThreadInfoWithStatus(
-                        2,
-                        [MockFrameInfo("idle.py", 20, "idle_func")],
-                        IDLE_STATUS,
-                    ),
-                    MockThreadInfoWithStatus(
-                        3,
-                        [MockFrameInfo("active2.py", 30, "active_func2")],
-                        ACTIVE_STATUS,
-                    ),
-                ],
-            )
-        ]
-
-        # Test with skip_idle=True - should only process running threads
-        collector_skip = JsonlCollector(
-            sample_interval_usec=1000, skip_idle=True
-        )
-        collector_skip.collect(test_frames)
-
-        run_id = collector_skip.run_id
-
-        # Should only have functions from running threads (status 0)
-        with captured_stdout(), captured_stderr():
-            collector_skip.export(jsonl_out.name)
-
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        def jsonl(obj):
-            return json.dumps(obj, separators=(",", ":"))
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "active_func1"},
-                            {"str_id": 2, "value": "active1.py"},
-                            {"str_id": 3, "value": "active_func2"},
-                            {"str_id": 4, "value": "active2.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 10, "end_line": 10},
-                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
-                             "line": 30, "end_line": 30}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 2,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
-                               {"frame_id": 2, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 2}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
-
-        # Test with skip_idle=False - should process all threads
-        collector_no_skip = JsonlCollector(
-            sample_interval_usec=1000, skip_idle=False
-        )
-        collector_no_skip.collect(test_frames)
-
-        run_id = collector_no_skip.run_id
-
-        # Should have functions from all threads
-        with captured_stdout(), captured_stderr():
-            collector_no_skip.export(jsonl_out.name)
-
-        # Check file contents
-        with open(jsonl_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 5)
-
-        expected = [
-            jsonl({"type": "meta", "v": 1, "run_id": run_id,
-                   "sample_interval_usec": 1000}),
-            jsonl({"type": "str_def", "v": 1, "run_id": run_id,
-                   "defs": [{"str_id": 1, "value": "active_func1"},
-                            {"str_id": 2, "value": "active1.py"},
-                            {"str_id": 3, "value": "idle_func"},
-                            {"str_id": 4, "value": "idle.py"},
-                            {"str_id": 5, "value": "active_func2"},
-                            {"str_id": 6, "value": "active2.py"}]}),
-            jsonl({"type": "frame_def", "v": 1, "run_id": run_id,
-                   "defs": [{"frame_id": 1, "path_str_id": 2, "func_str_id": 1,
-                             "line": 10, "end_line": 10},
-                            {"frame_id": 2, "path_str_id": 4, "func_str_id": 3,
-                             "line": 20, "end_line": 20},
-                            {"frame_id": 3, "path_str_id": 6, "func_str_id": 5,
-                             "line": 30, "end_line": 30}]}),
-            jsonl({"type": "agg", "v": 1, "run_id": run_id,
-                   "kind": "frame", "scope": "final", "samples_total": 3,
-                   "entries": [{"frame_id": 1, "self": 1, "cumulative": 1},
-                               {"frame_id": 2, "self": 1, "cumulative": 1},
-                               {"frame_id": 3, "self": 1, "cumulative": 1}]}),
-            jsonl({"type": "end", "v": 1, "run_id": run_id,
-                   "samples_total": 3}),
-        ]
-
-        for exp in expected:
-            self.assertIn(exp, lines)
-
-
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):
     """Test GIL mode filtering functionality (--mode=gil)."""

From 192e54bd5415d8242068c5650a1e27795dadc285 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:31:15 +0200
Subject: [PATCH 25/38] reorder

---
 Lib/profiling/sampling/jsonl_collector.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 6c8f2bc2fd3135..372205a566afc6 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -116,6 +116,15 @@ def _build_end_record(self):
 
         return record
 
+    def _iter_final_agg_entries(self):
+        for frame_record in self._frames:
+            frame_id = frame_record["frame_id"]
+            yield {
+                "frame_id": frame_id,
+                "self": self._frame_self[frame_id],
+                "cumulative": self._frame_cumulative[frame_id],
+            }
+
     def _get_or_create_frame_id(self, filename, location, funcname):
         synthetic = location is None
         location_fields = self._location_to_export_fields(location)
@@ -175,15 +184,6 @@ def _location_to_export_fields(location):
             fields["end_col"] = end_col_offset
         return fields
 
-    def _iter_final_agg_entries(self):
-        for frame_record in self._frames:
-            frame_id = frame_record["frame_id"]
-            yield {
-                "frame_id": frame_id,
-                "self": self._frame_self[frame_id],
-                "cumulative": self._frame_cumulative[frame_id],
-            }
-
     def _write_chunked_records(
         self, output, base_record, chunk_field, entries
     ):

From 3189a8fe45b31a2949b27cde2b7f4ae6d2f06cd5 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:46:44 +0200
Subject: [PATCH 26/38] eh, just copy from heatmap

---
 Lib/profiling/sampling/jsonl_collector.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 372205a566afc6..146075b00b121c 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -43,6 +43,7 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         self._frame_self = Counter()
         self._frame_cumulative = Counter()
         self._samples_total = 0
+        self._seen_frame_ids = set()
 
         self._mode = mode
 
@@ -51,17 +52,21 @@ def process_frames(self, frames, _thread_id, weight=1):
             return
 
         self._samples_total += weight
+        self._seen_frame_ids.clear()
 
-        frame_ids = [
-            self._get_or_create_frame_id(filename, location, funcname)
-            for filename, location, funcname, _opcode in frames
-        ]
-        leaf_frame_id = frame_ids[0]
+        for i, (filename, location, funcname, _opcode) in enumerate(frames):
+            frame_id = self._get_or_create_frame_id(filename, location, funcname)
+            is_leaf = (i == 0)
+            count_cumulative = frame_id not in self._seen_frame_ids
 
-        self._frame_self[leaf_frame_id] += weight
+            if count_cumulative:
+                self._seen_frame_ids.add(frame_id)
 
-        for frame_id in set(frame_ids):
-            self._frame_cumulative[frame_id] += weight
+            if is_leaf:
+                self._frame_self[frame_id] += weight
+
+            if count_cumulative:
+                self._frame_cumulative[frame_id] += weight
 
     def export(self, filename):
         with open(filename, "w", encoding="utf-8") as output:

From 935779f072ede6cefa1759ae30d224347819f699 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:54:14 +0200
Subject: [PATCH 27/38] smaller chunk; matter of taste

---
 Lib/profiling/sampling/jsonl_collector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 146075b00b121c..12d8e4b3e2e77b 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -16,7 +16,7 @@
 from .stack_collector import StackTraceCollector
 
 
-_CHUNK_SIZE = 1000
+_CHUNK_SIZE = 256
 
 _MODE_NAMES = {
     PROFILING_MODE_WALL: "wall",

From e3d8aff3de38a327ff3bab9f8b3b3070232f1764 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 15:54:27 +0200
Subject: [PATCH 28/38] test actual chunking

---
 .../test_sampling_profiler/test_collectors.py | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 4f1c76de414c5e..2988a2efe21eaa 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1791,6 +1791,49 @@ def export_summary(skip_idle):
         )
         self.assertEqual(samples_total, 3)
 
+    def test_jsonl_collector_splits_large_exports_into_chunks(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+
+        for i in range(257):
+            collector.collect(
+                [
+                    MockInterpreterInfo(
+                        0,
+                        [
+                            MockThreadInfo(
+                                1,
+                                [MockFrameInfo(f"file{i}.py", i + 1, f"func{i}")],
+                            )
+                        ],
+                    )
+                ]
+            )
+
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        run_ids = {record["run_id"] for record in records}
+        self.assertEqual(len(run_ids), 1)
+        self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
+
+        _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables(records)
+        str_chunks = [record for record in records if record["type"] == "str_def"]
+        frame_chunks = [record for record in records if record["type"] == "frame_def"]
+        agg_chunks = [record for record in records if record["type"] == "agg"]
+
+        self.assertEqual([len(record["defs"]) for record in str_chunks], [256, 256, 2])
+        self.assertEqual([len(record["defs"]) for record in frame_chunks], [256, 1])
+        self.assertEqual([len(record["entries"]) for record in agg_chunks], [256, 1])
+        self.assertEqual(len(str_defs), 514)
+        self.assertEqual(len(frame_defs), 257)
+        self.assertEqual(agg_record["samples_total"], 257)
+        self.assertEqual(end_record["samples_total"], 257)
+
 
 class TestRecursiveFunctionHandling(unittest.TestCase):
     """Tests for correct handling of recursive functions in cumulative stats."""

From d37f07a26cd36aa854927a778d8f3c7cdecc7015 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:00:29 +0200
Subject: [PATCH 29/38] test edge cases

---
 .../test_sampling_profiler/test_collectors.py | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 2988a2efe21eaa..a2e1f85c45d680 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1738,6 +1738,80 @@ def test_jsonl_collector_export_exact_output(self):
             ),
         )
 
+    def test_jsonl_collector_export_includes_mode_in_meta(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000, mode=PROFILING_MODE_CPU)
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func")])],
+                )
+            ]
+        )
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        meta_record = next(record for record in records if record["type"] == "meta")
+        self.assertEqual(meta_record["mode"], "cpu")
+
+    def test_jsonl_collector_export_empty_profile(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        collector.run_id = "run-123"
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        self.assertEqual([record["type"] for record in records], ["meta", "end"])
+        self.assertEqual(records[0]["sample_interval_usec"], 1000)
+        self.assertEqual(records[0]["run_id"], "run-123")
+        self.assertEqual(records[1]["samples_total"], 0)
+        self.assertEqual(records[1]["run_id"], "run-123")
+
+    def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
+        jsonl_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, jsonl_out)
+
+        collector = JsonlCollector(1000)
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [
+                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                            ],
+                        )
+                    ],
+                )
+            ]
+        )
+        collector.export(jsonl_out.name)
+
+        with open(jsonl_out.name, "r", encoding="utf-8") as f:
+            records = [json.loads(line) for line in f]
+
+        _, _, frame_defs, agg_record, end_record = _jsonl_tables(records)
+        self.assertEqual(len(frame_defs), 1)
+        self.assertEqual(
+            agg_record["entries"],
+            [{"frame_id": frame_defs[0]["frame_id"], "self": 1, "cumulative": 1}],
+        )
+        self.assertEqual(agg_record["samples_total"], 1)
+        self.assertEqual(end_record["samples_total"], 1)
+
     def test_jsonl_collector_skip_idle_filters_threads(self):
         jsonl_out = tempfile.NamedTemporaryFile(delete=False)
         self.addCleanup(close_and_unlink, jsonl_out)

From aaaa9722d4ced736842227882ee43bd47cbc3b96 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:05:30 +0200
Subject: [PATCH 30/38] ruff

---
 Lib/profiling/sampling/jsonl_collector.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index 12d8e4b3e2e77b..a1d37df85c2672 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -55,8 +55,10 @@ def process_frames(self, frames, _thread_id, weight=1):
         self._seen_frame_ids.clear()
 
         for i, (filename, location, funcname, _opcode) in enumerate(frames):
-            frame_id = self._get_or_create_frame_id(filename, location, funcname)
-            is_leaf = (i == 0)
+            frame_id = self._get_or_create_frame_id(
+                filename, location, funcname
+            )
+            is_leaf = i == 0
             count_cumulative = frame_id not in self._seen_frame_ids
 
             if count_cumulative:

From a9b6ccd58ddfa464f130bcd27ec53f43163eb1e5 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:16:37 +0200
Subject: [PATCH 31/38] match pep8

---
 Lib/test/test_profiling/test_sampling_profiler/test_modes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
index 6cd636593e3db1..0b38fb4ad4bcf6 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -228,6 +228,7 @@ def test_cpu_mode_with_no_samples(self):
         self.assertIn("No samples were collected", output)
         self.assertIn("CPU mode", output)
 
+
 @requires_remote_subprocess_debugging()
 class TestGilModeFiltering(unittest.TestCase):
     """Test GIL mode filtering functionality (--mode=gil)."""

From 4fb3ade939080abaf0239ec9df7e6d43afd3b0af Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:44:27 +0200
Subject: [PATCH 32/38] style

---
 Lib/profiling/sampling/binary_reader.py       |   2 +-
 .../test_sampling_profiler/test_collectors.py | 124 +++++++++++++-----
 2 files changed, 94 insertions(+), 32 deletions(-)

diff --git a/Lib/profiling/sampling/binary_reader.py b/Lib/profiling/sampling/binary_reader.py
index 8d1d8eef9155eb..a29dad91ae339d 100644
--- a/Lib/profiling/sampling/binary_reader.py
+++ b/Lib/profiling/sampling/binary_reader.py
@@ -118,7 +118,7 @@ def convert_binary_to_format(input_file, output_file, output_format,
             collector = PstatsCollector(interval)
         elif output_format == 'gecko':
             collector = GeckoCollector(interval)
-        elif output_format == 'jsonl':
+        elif output_format == "jsonl":
             collector = JsonlCollector(interval)
         else:
             raise ValueError(f"Unknown output format: {output_format}")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index a2e1f85c45d680..915468141a9217 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -1697,7 +1697,11 @@ def test_jsonl_collector_export_exact_output(self):
                 0,
                 [
                     MockThreadInfo(
-                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
                     )
                 ],
             )
@@ -1707,14 +1711,23 @@ def test_jsonl_collector_export_exact_output(self):
                 0,
                 [
                     MockThreadInfo(
-                        1, [MockFrameInfo("file.py", 10, "func1"), MockFrameInfo("file.py", 20, "func2")]
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
                     )
                 ],
             )
         ]  # Same stack
         test_frames3 = [
             MockInterpreterInfo(
-                0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])]
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("other.py", 5, "other_func")]
+                    )
+                ],
             )
         ]
 
@@ -1747,7 +1760,11 @@ def test_jsonl_collector_export_includes_mode_in_meta(self):
             [
                 MockInterpreterInfo(
                     0,
-                    [MockThreadInfo(1, [MockFrameInfo("file.py", 10, "func")])],
+                    [
+                        MockThreadInfo(
+                            1, [MockFrameInfo("file.py", 10, "func")]
+                        )
+                    ],
                 )
             ]
         )
@@ -1756,7 +1773,9 @@ def test_jsonl_collector_export_includes_mode_in_meta(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        meta_record = next(record for record in records if record["type"] == "meta")
+        meta_record = next(
+            record for record in records if record["type"] == "meta"
+        )
         self.assertEqual(meta_record["mode"], "cpu")
 
     def test_jsonl_collector_export_empty_profile(self):
@@ -1770,7 +1789,9 @@ def test_jsonl_collector_export_empty_profile(self):
         with open(jsonl_out.name, "r", encoding="utf-8") as f:
             records = [json.loads(line) for line in f]
 
-        self.assertEqual([record["type"] for record in records], ["meta", "end"])
+        self.assertEqual(
+            [record["type"] for record in records], ["meta", "end"]
+        )
         self.assertEqual(records[0]["sample_interval_usec"], 1000)
         self.assertEqual(records[0]["run_id"], "run-123")
         self.assertEqual(records[1]["samples_total"], 0)
@@ -1789,9 +1810,15 @@ def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
                         MockThreadInfo(
                             1,
                             [
-                                MockFrameInfo("recursive.py", 10, "recursive_func"),
-                                MockFrameInfo("recursive.py", 10, "recursive_func"),
-                                MockFrameInfo("recursive.py", 10, "recursive_func"),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
+                                MockFrameInfo(
+                                    "recursive.py", 10, "recursive_func"
+                                ),
                             ],
                         )
                     ],
@@ -1807,7 +1834,13 @@ def test_jsonl_collector_recursive_frames_counted_once_per_sample(self):
         self.assertEqual(len(frame_defs), 1)
         self.assertEqual(
             agg_record["entries"],
-            [{"frame_id": frame_defs[0]["frame_id"], "self": 1, "cumulative": 1}],
+            [
+                {
+                    "frame_id": frame_defs[0]["frame_id"],
+                    "self": 1,
+                    "cumulative": 1,
+                }
+            ],
         )
         self.assertEqual(agg_record["samples_total"], 1)
         self.assertEqual(end_record["samples_total"], 1)
@@ -1860,9 +1893,7 @@ def export_summary(skip_idle):
 
         paths, funcs, samples_total = export_summary(skip_idle=False)
         self.assertEqual(paths, {"active1.py", "idle.py", "active2.py"})
-        self.assertEqual(
-            funcs, {"active_func1", "idle_func", "active_func2"}
-        )
+        self.assertEqual(funcs, {"active_func1", "idle_func", "active_func2"})
         self.assertEqual(samples_total, 3)
 
     def test_jsonl_collector_splits_large_exports_into_chunks(self):
@@ -1879,7 +1910,11 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
                         [
                             MockThreadInfo(
                                 1,
-                                [MockFrameInfo(f"file{i}.py", i + 1, f"func{i}")],
+                                [
+                                    MockFrameInfo(
+                                        f"file{i}.py", i + 1, f"func{i}"
+                                    )
+                                ],
                             )
                         ],
                     )
@@ -1895,14 +1930,26 @@ def test_jsonl_collector_splits_large_exports_into_chunks(self):
         self.assertEqual(len(run_ids), 1)
         self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$")
 
-        _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables(records)
-        str_chunks = [record for record in records if record["type"] == "str_def"]
-        frame_chunks = [record for record in records if record["type"] == "frame_def"]
+        _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables(
+            records
+        )
+        str_chunks = [
+            record for record in records if record["type"] == "str_def"
+        ]
+        frame_chunks = [
+            record for record in records if record["type"] == "frame_def"
+        ]
         agg_chunks = [record for record in records if record["type"] == "agg"]
 
-        self.assertEqual([len(record["defs"]) for record in str_chunks], [256, 256, 2])
-        self.assertEqual([len(record["defs"]) for record in frame_chunks], [256, 1])
-        self.assertEqual([len(record["entries"]) for record in agg_chunks], [256, 1])
+        self.assertEqual(
+            [len(record["defs"]) for record in str_chunks], [256, 256, 2]
+        )
+        self.assertEqual(
+            [len(record["defs"]) for record in frame_chunks], [256, 1]
+        )
+        self.assertEqual(
+            [len(record["entries"]) for record in agg_chunks], [256, 1]
+        )
         self.assertEqual(len(str_defs), 514)
         self.assertEqual(len(frame_defs), 257)
         self.assertEqual(agg_record["samples_total"], 257)
@@ -2071,7 +2118,9 @@ def test_pstats_collector_cumulative_percentage_cannot_exceed_100(self):
         cumulative_calls = stats[1]
         self.assertEqual(cumulative_calls, 10)
 
-    def test_pstats_collector_different_lines_same_function_counted_separately(self):
+    def test_pstats_collector_different_lines_same_function_counted_separately(
+        self,
+    ):
         """Test that different line numbers in same function are tracked separately."""
         collector = PstatsCollector(sample_interval_usec=1000)
 
@@ -2278,8 +2327,7 @@ def test_flamegraph_collector_with_location_info(self):
         frame = MockFrameInfo("app.py", 100, "process_data")
         frames = [
             MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)
@@ -2287,8 +2335,15 @@ def test_flamegraph_collector_with_location_info(self):
         data = collector._convert_to_flamegraph_format()
         # Verify the function name includes lineno from location
         strings = data.get("strings", [])
-        name_found = any("process_data" in s and "100" in s for s in strings if isinstance(s, str))
-        self.assertTrue(name_found, f"Expected to find 'process_data' with line 100 in {strings}")
+        name_found = any(
+            "process_data" in s and "100" in s
+            for s in strings
+            if isinstance(s, str)
+        )
+        self.assertTrue(
+            name_found,
+            f"Expected to find 'process_data' with line 100 in {strings}",
+        )
 
     def test_gecko_collector_with_location_info(self):
         """Test GeckoCollector handles LocationInfo properly."""
@@ -2297,8 +2352,7 @@ def test_gecko_collector_with_location_info(self):
         frame = MockFrameInfo("server.py", 50, "handle_request")
         frames = [
             MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)
@@ -2561,8 +2615,12 @@ def _make_sample_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 100, "main", opcode=90),
-                            MockFrameInfo("utils.py", 50, "helper", opcode=100),
-                            MockFrameInfo("lib.py", 25, "process", opcode=None),
+                            MockFrameInfo(
+                                "utils.py", 50, "helper", opcode=100
+                            ),
+                            MockFrameInfo(
+                                "lib.py", 25, "process", opcode=None
+                            ),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )
@@ -2720,7 +2778,9 @@ def test_flamegraph_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
+                            MockFrameInfo(
+                                "/lib/_sync_coordinator.py", 100, "main"
+                            ),
                             MockFrameInfo("<frozen runpy>", 87, "_run_code"),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
@@ -2748,7 +2808,9 @@ def test_collapsed_stack_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
+                            MockFrameInfo(
+                                "/lib/_sync_coordinator.py", 100, "main"
+                            ),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )

From a0decb5d8b34072fdf8f70cd4276ace9b78e7380 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 16:49:26 +0200
Subject: [PATCH 33/38] too defensive

---
 Lib/profiling/sampling/jsonl_collector.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py
index a1d37df85c2672..187c4175da6816 100644
--- a/Lib/profiling/sampling/jsonl_collector.py
+++ b/Lib/profiling/sampling/jsonl_collector.py
@@ -48,9 +48,6 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None):
         self._mode = mode
 
     def process_frames(self, frames, _thread_id, weight=1):
-        if not frames:
-            return
-
         self._samples_total += weight
         self._seen_frame_ids.clear()
 

From 5f1704b87d756066d94d94fd7c2f861c107b40ac Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:16:15 +0200
Subject: [PATCH 34/38] too many style changes

---
 .../test_sampling_profiler/test_collectors.py | 21 +++++++------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 915468141a9217..3c2ce8c66f7570 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2118,9 +2118,7 @@ def test_pstats_collector_cumulative_percentage_cannot_exceed_100(self):
         cumulative_calls = stats[1]
         self.assertEqual(cumulative_calls, 10)
 
-    def test_pstats_collector_different_lines_same_function_counted_separately(
-        self,
-    ):
+    def test_pstats_collector_different_lines_same_function_counted_separately(self):
         """Test that different line numbers in same function are tracked separately."""
         collector = PstatsCollector(sample_interval_usec=1000)
 
@@ -2327,7 +2325,8 @@ def test_flamegraph_collector_with_location_info(self):
         frame = MockFrameInfo("app.py", 100, "process_data")
         frames = [
             MockInterpreterInfo(
-                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0,
+                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)
@@ -2335,15 +2334,8 @@ def test_flamegraph_collector_with_location_info(self):
         data = collector._convert_to_flamegraph_format()
         # Verify the function name includes lineno from location
         strings = data.get("strings", [])
-        name_found = any(
-            "process_data" in s and "100" in s
-            for s in strings
-            if isinstance(s, str)
-        )
-        self.assertTrue(
-            name_found,
-            f"Expected to find 'process_data' with line 100 in {strings}",
-        )
+        name_found = any("process_data" in s and "100" in s for s in strings if isinstance(s, str))
+        self.assertTrue(name_found, f"Expected to find 'process_data' with line 100 in {strings}")
 
     def test_gecko_collector_with_location_info(self):
         """Test GeckoCollector handles LocationInfo properly."""
@@ -2352,7 +2344,8 @@ def test_gecko_collector_with_location_info(self):
         frame = MockFrameInfo("server.py", 50, "handle_request")
         frames = [
             MockInterpreterInfo(
-                0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
+                0,
+                [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)]
             )
         ]
         collector.collect(frames)

From f2a21fb0108e0d92e96dd0768236cebd4d005cce Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:18:14 +0200
Subject: [PATCH 35/38] less style

---
 .../test_sampling_profiler/test_collectors.py        | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 3c2ce8c66f7570..833800c163c146 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2608,12 +2608,8 @@ def _make_sample_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 100, "main", opcode=90),
-                            MockFrameInfo(
-                                "utils.py", 50, "helper", opcode=100
-                            ),
-                            MockFrameInfo(
-                                "lib.py", 25, "process", opcode=None
-                            ),
+                            MockFrameInfo("utils.py", 50, "helper", opcode=100),
+                            MockFrameInfo("lib.py", 25, "process", opcode=None),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )
@@ -2801,9 +2797,7 @@ def test_collapsed_stack_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo(
-                                "/lib/_sync_coordinator.py", 100, "main"
-                            ),
+                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
                         ],
                         status=THREAD_STATUS_HAS_GIL,
                     )

From 15b07badef5755278f0ea57fe28692423498c7e6 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:20:22 +0200
Subject: [PATCH 36/38] ha! even less style...

---
 .../test_profiling/test_sampling_profiler/test_collectors.py  | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
index 833800c163c146..f96304b1f3443a 100644
--- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -2767,9 +2767,7 @@ def test_flamegraph_collector_filters_internal_frames(self):
                         1,
                         [
                             MockFrameInfo("app.py", 50, "run"),
-                            MockFrameInfo(
-                                "/lib/_sync_coordinator.py", 100, "main"
-                            ),
+                            MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"),
                             MockFrameInfo("<frozen runpy>", 87, "_run_code"),
                         ],
                         status=THREAD_STATUS_HAS_GIL,

From 148f4e21d4c27d9e4c25e46d026ad4f94061e236 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:33:18 +0200
Subject: [PATCH 37/38] news

---
 .../Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst     | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst

diff --git a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
new file mode 100644
index 00000000000000..d2d7e0d98d158b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -0,0 +1,3 @@
+The ``profiling.sampling`` module now supports JSONL output format via
+`--jsonl`. Each run emits newline-delimited JSON records suitable for
+streaming or agents.

From 69c576826256dc48472be37d6c93e53ba1628889 Mon Sep 17 00:00:00 2001
From: maurycy <5383+maurycy@users.noreply.github.com>
Date: Tue, 31 Mar 2026 17:33:34 +0200
Subject: [PATCH 38/38] news: proper formatting

---
 .../next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
index d2d7e0d98d158b..d270cc14288d8a 100644
--- a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
+++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst
@@ -1,3 +1,3 @@
 The ``profiling.sampling`` module now supports JSONL output format via
-`--jsonl`. Each run emits newline-delimited JSON records suitable for
+``--jsonl``. Each run emits newline-delimited JSON records suitable for
 streaming or agents.