diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py index 6a0bb5e5c2f387..71579a3903253e 100644 --- a/Lib/profiling/sampling/__init__.py +++ b/Lib/profiling/sampling/__init__.py @@ -9,6 +9,15 @@ from .stack_collector import CollapsedStackCollector from .heatmap_collector import HeatmapCollector from .gecko_collector import GeckoCollector +from .jsonl_collector import JsonlCollector from .string_table import StringTable -__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "HeatmapCollector", "GeckoCollector", "StringTable") +__all__ = ( + "Collector", + "PstatsCollector", + "CollapsedStackCollector", + "HeatmapCollector", + "GeckoCollector", + "JsonlCollector", + "StringTable", +) diff --git a/Lib/profiling/sampling/binary_reader.py b/Lib/profiling/sampling/binary_reader.py index a11be3652597a6..a29dad91ae339d 100644 --- a/Lib/profiling/sampling/binary_reader.py +++ b/Lib/profiling/sampling/binary_reader.py @@ -4,6 +4,7 @@ from .gecko_collector import GeckoCollector from .stack_collector import FlamegraphCollector, CollapsedStackCollector +from .jsonl_collector import JsonlCollector from .pstats_collector import PstatsCollector @@ -117,6 +118,8 @@ def convert_binary_to_format(input_file, output_file, output_format, collector = PstatsCollector(interval) elif output_format == 'gecko': collector = GeckoCollector(interval) + elif output_format == "jsonl": + collector = JsonlCollector(interval) else: raise ValueError(f"Unknown output format: {output_format}") diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index e22ab158d6a94d..9ea11ce03872ec 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -19,6 +19,7 @@ from .stack_collector import CollapsedStackCollector, FlamegraphCollector, DiffFlamegraphCollector from .heatmap_collector import HeatmapCollector from .gecko_collector import GeckoCollector +from .jsonl_collector import JsonlCollector from .binary_collector import BinaryCollector from .binary_reader import BinaryReader from .constants import ( @@ -95,6 +96,7 @@ def __call__(self, parser, namespace, values, option_string=None): "diff_flamegraph": "html", "gecko": "json", "heatmap": "html", + "jsonl": "jsonl", "binary": "bin", } @@ -105,6 +107,7 @@ def __call__(self, parser, namespace, values, option_string=None): "diff_flamegraph": DiffFlamegraphCollector, "gecko": GeckoCollector, "heatmap": HeatmapCollector, + "jsonl": JsonlCollector, "binary": BinaryCollector, } @@ -482,6 +485,13 @@ def _add_format_options(parser, include_compression=True, include_binary=True): action=DiffFlamegraphAction, help="Generate differential flamegraph comparing current profile to BASELINE binary file", ) + format_group.add_argument( + "--jsonl", + action="store_const", + const="jsonl", + dest="format", + help="Generate JSONL snapshot output for external consumers", + ) if include_binary: format_group.add_argument( "--binary", @@ -560,15 +570,17 @@ def _sort_to_mode(sort_choice): return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False, - output_file=None, compression='auto', diff_baseline=None): + mode=None, output_file=None, compression='auto', diff_baseline=None): """Create the appropriate collector based on format type. Args: - format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary', 'diff_flamegraph') + format_type: The output format ('pstats', 'collapsed', 'flamegraph', + 'gecko', 'heatmap', 'jsonl', 'binary', 'diff_flamegraph') sample_interval_usec: Sampling interval in microseconds skip_idle: Whether to skip idle samples opcodes: Whether to collect opcode information (only used by gecko format for creating interval markers in Firefox Profiler) + mode: Profiling mode for collectors that expose it in metadata output_file: Output file path (required for binary format) compression: Compression type for binary format ('auto', 'zstd', 'none') diff_baseline: Path to baseline binary file for differential flamegraph @@ -604,6 +616,11 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals skip_idle = False return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes) + if format_type == "jsonl": + return collector_class( + sample_interval_usec, skip_idle=skip_idle, mode=mode + ) + return collector_class(sample_interval_usec, skip_idle=skip_idle) @@ -978,7 +995,7 @@ def _handle_attach(args): # Create the appropriate collector collector = _create_collector( - args.format, args.sample_interval_usec, skip_idle, args.opcodes, + args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode, output_file=output_file, compression=getattr(args, 'compression', 'auto'), diff_baseline=args.diff_baseline @@ -1057,7 +1074,7 @@ def _handle_run(args): # Create the appropriate collector collector = _create_collector( - args.format, args.sample_interval_usec, skip_idle, args.opcodes, + args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode, output_file=output_file, compression=getattr(args, 'compression', 'auto'), diff_baseline=args.diff_baseline diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index 7dc095c6c279bd..dc6eb751b99e15 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -20,13 +20,16 @@ def normalize_location(location): """Normalize location to a 4-tuple format. Args: - location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None + location: tuple (lineno, end_lineno, col_offset, end_col_offset), + an integer line number, or None Returns: tuple: (lineno, end_lineno, col_offset, end_col_offset) """ if location is None: return DEFAULT_LOCATION + if isinstance(location, int): + return (location, location, -1, -1) return location diff --git a/Lib/profiling/sampling/jsonl_collector.py b/Lib/profiling/sampling/jsonl_collector.py new file mode 100644 index 00000000000000..187c4175da6816 --- /dev/null +++ b/Lib/profiling/sampling/jsonl_collector.py @@ -0,0 +1,200 @@ +"""JSONL collector.""" + +from collections import Counter +import json +import uuid +from itertools import batched + +from .constants import ( + PROFILING_MODE_ALL, + PROFILING_MODE_CPU, + PROFILING_MODE_EXCEPTION, + PROFILING_MODE_GIL, + PROFILING_MODE_WALL, +) +from .collector import normalize_location +from .stack_collector import StackTraceCollector + + +_CHUNK_SIZE = 256 + +_MODE_NAMES = { + PROFILING_MODE_WALL: "wall", + PROFILING_MODE_CPU: "cpu", + PROFILING_MODE_GIL: "gil", + PROFILING_MODE_ALL: "all", + PROFILING_MODE_EXCEPTION: "exception", +} + + +class JsonlCollector(StackTraceCollector): + """Collector that exports finalized profiling data as JSONL.""" + + def __init__(self, sample_interval_usec, *, skip_idle=False, mode=None): + super().__init__(sample_interval_usec, skip_idle=skip_idle) + self.run_id = uuid.uuid4().hex + + self._string_to_id = {} + self._strings = [] + + self._frame_to_id = {} + self._frames = [] + + self._frame_self = Counter() + self._frame_cumulative = Counter() + self._samples_total = 0 + self._seen_frame_ids = set() + + self._mode = mode + + def process_frames(self, frames, _thread_id, weight=1): + self._samples_total += weight + self._seen_frame_ids.clear() + + for i, (filename, location, funcname, _opcode) in enumerate(frames): + frame_id = self._get_or_create_frame_id( + filename, location, funcname + ) + is_leaf = i == 0 + count_cumulative = frame_id not in self._seen_frame_ids + + if count_cumulative: + self._seen_frame_ids.add(frame_id) + + if is_leaf: + self._frame_self[frame_id] += weight + + if count_cumulative: + self._frame_cumulative[frame_id] += weight + + def export(self, filename): + with open(filename, "w", encoding="utf-8") as output: + self._write_message(output, self._build_meta_record()) + self._write_chunked_records( + output, + {"type": "str_def", "v": 1, "run_id": self.run_id}, + "defs", + self._strings, + ) + self._write_chunked_records( + output, + {"type": "frame_def", "v": 1, "run_id": self.run_id}, + "defs", + self._frames, + ) + self._write_chunked_records( + output, + { + "type": "agg", + "v": 1, + "run_id": self.run_id, + "kind": "frame", + "scope": "final", + "samples_total": self._samples_total, + }, + "entries", + self._iter_final_agg_entries(), + ) + self._write_message(output, self._build_end_record()) + + def _build_meta_record(self): + record = { + "type": "meta", + "v": 1, + "run_id": self.run_id, + "sample_interval_usec": self.sample_interval_usec, + } + + if self._mode is not None: + record["mode"] = _MODE_NAMES.get(self._mode, str(self._mode)) + + return record + + def _build_end_record(self): + record = { + "type": "end", + "v": 1, + "run_id": self.run_id, + "samples_total": self._samples_total, + } + + return record + + def _iter_final_agg_entries(self): + for frame_record in self._frames: + frame_id = frame_record["frame_id"] + yield { + "frame_id": frame_id, + "self": self._frame_self[frame_id], + "cumulative": self._frame_cumulative[frame_id], + } + + def _get_or_create_frame_id(self, filename, location, funcname): + synthetic = location is None + location_fields = self._location_to_export_fields(location) + func_str_id = self._intern_string(funcname) + path_str_id = self._intern_string(filename) + + frame_key = ( + path_str_id, + func_str_id, + location_fields["line"], + location_fields.get("end_line"), + location_fields.get("col"), + location_fields.get("end_col"), + synthetic, + ) + + if (frame_id := self._frame_to_id.get(frame_key)) is not None: + return frame_id + + frame_id = len(self._frames) + 1 + frame_record = { + "frame_id": frame_id, + "path_str_id": path_str_id, + "func_str_id": func_str_id, + **location_fields, + } + if synthetic: + frame_record["synthetic"] = True + + self._frame_to_id[frame_key] = frame_id + self._frames.append(frame_record) + return frame_id + + def _intern_string(self, value): + value = str(value) + + if (string_id := self._string_to_id.get(value)) is not None: + return string_id + + string_id = len(self._strings) + 1 + self._string_to_id[value] = string_id + self._strings.append({"str_id": string_id, "value": value}) + return string_id + + @staticmethod + def _location_to_export_fields(location): + lineno, end_lineno, col_offset, end_col_offset = normalize_location( + location + ) + + fields = {"line": lineno} + if end_lineno > 0: + fields["end_line"] = end_lineno + if col_offset >= 0: + fields["col"] = col_offset + if end_col_offset >= 0: + fields["end_col"] = end_col_offset + return fields + + def _write_chunked_records( + self, output, base_record, chunk_field, entries + ): + for chunk in batched(entries, _CHUNK_SIZE): + self._write_message(output, {**base_record, chunk_field: chunk}) + + @staticmethod + def _write_message(output, record): + output.write(json.dumps(record, separators=(",", ":"))) + output.write("\n") diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py index 86fb9d4c05b3bc..f96304b1f3443a 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py @@ -16,6 +16,7 @@ CollapsedStackCollector, FlamegraphCollector, ) + from profiling.sampling.jsonl_collector import JsonlCollector from profiling.sampling.gecko_collector import GeckoCollector from profiling.sampling.collector import extract_lineno, normalize_location from profiling.sampling.opcode_utils import get_opcode_info, format_opcode @@ -57,6 +58,25 @@ def find_child_by_name(children, strings, substr): return None +def _jsonl_tables(records): + meta = next(record for record in records if record["type"] == "meta") + end = next(record for record in records if record["type"] == "end") + agg = next(record for record in records if record["type"] == "agg") + str_defs = { + item["str_id"]: item["value"] + for record in records + if record["type"] == "str_def" + for item in record["defs"] + } + frame_defs = [ + item + for record in records + if record["type"] == "frame_def" + for item in record["defs"] + ] + return meta, str_defs, frame_defs, agg, end + + class TestSampleProfilerComponents(unittest.TestCase): """Unit tests for individual profiler components.""" @@ -1665,6 +1685,276 @@ def test_diff_flamegraph_load_baseline(self): self.assertAlmostEqual(cold_node["diff"], -1.0) self.assertAlmostEqual(cold_node["diff_pct"], -50.0) + def test_jsonl_collector_export_exact_output(self): + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + collector = JsonlCollector(1000) + collector.run_id = "run-123" + + test_frames1 = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ], + ) + ], + ) + ] + test_frames2 = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("file.py", 10, "func1"), + MockFrameInfo("file.py", 20, "func2"), + ], + ) + ], + ) + ] # Same stack + test_frames3 = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, [MockFrameInfo("other.py", 5, "other_func")] + ) + ], + ) + ] + + collector.collect(test_frames1) + collector.collect(test_frames2) + collector.collect(test_frames3) + + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + content = f.read() + + self.assertEqual( + content, + ( + '{"type":"meta","v":1,"run_id":"run-123","sample_interval_usec":1000}\n' + '{"type":"str_def","v":1,"run_id":"run-123","defs":[{"str_id":1,"value":"func1"},{"str_id":2,"value":"file.py"},{"str_id":3,"value":"func2"},{"str_id":4,"value":"other_func"},{"str_id":5,"value":"other.py"}]}\n' + '{"type":"frame_def","v":1,"run_id":"run-123","defs":[{"frame_id":1,"path_str_id":2,"func_str_id":1,"line":10,"end_line":10},{"frame_id":2,"path_str_id":2,"func_str_id":3,"line":20,"end_line":20},{"frame_id":3,"path_str_id":5,"func_str_id":4,"line":5,"end_line":5}]}\n' + '{"type":"agg","v":1,"run_id":"run-123","kind":"frame","scope":"final","samples_total":3,"entries":[{"frame_id":1,"self":2,"cumulative":2},{"frame_id":2,"self":0,"cumulative":2},{"frame_id":3,"self":1,"cumulative":1}]}\n' + '{"type":"end","v":1,"run_id":"run-123","samples_total":3}\n' + ), + ) + + def test_jsonl_collector_export_includes_mode_in_meta(self): + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + collector = JsonlCollector(1000, mode=PROFILING_MODE_CPU) + collector.collect( + [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, [MockFrameInfo("file.py", 10, "func")] + ) + ], + ) + ] + ) + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + meta_record = next( + record for record in records if record["type"] == "meta" + ) + self.assertEqual(meta_record["mode"], "cpu") + + def test_jsonl_collector_export_empty_profile(self): + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + collector = JsonlCollector(1000) + collector.run_id = "run-123" + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + self.assertEqual( + [record["type"] for record in records], ["meta", "end"] + ) + self.assertEqual(records[0]["sample_interval_usec"], 1000) + self.assertEqual(records[0]["run_id"], "run-123") + self.assertEqual(records[1]["samples_total"], 0) + self.assertEqual(records[1]["run_id"], "run-123") + + def test_jsonl_collector_recursive_frames_counted_once_per_sample(self): + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + collector = JsonlCollector(1000) + collector.collect( + [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo( + "recursive.py", 10, "recursive_func" + ), + MockFrameInfo( + "recursive.py", 10, "recursive_func" + ), + MockFrameInfo( + "recursive.py", 10, "recursive_func" + ), + ], + ) + ], + ) + ] + ) + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + _, _, frame_defs, agg_record, end_record = _jsonl_tables(records) + self.assertEqual(len(frame_defs), 1) + self.assertEqual( + agg_record["entries"], + [ + { + "frame_id": frame_defs[0]["frame_id"], + "self": 1, + "cumulative": 1, + } + ], + ) + self.assertEqual(agg_record["samples_total"], 1) + self.assertEqual(end_record["samples_total"], 1) + + def test_jsonl_collector_skip_idle_filters_threads(self): + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + active_status = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU + frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [MockFrameInfo("active1.py", 10, "active_func1")], + status=active_status, + ), + MockThreadInfo( + 2, + [MockFrameInfo("idle.py", 20, "idle_func")], + status=0, + ), + MockThreadInfo( + 3, + [MockFrameInfo("active2.py", 30, "active_func2")], + status=active_status, + ), + ], + ) + ] + + def export_summary(skip_idle): + collector = JsonlCollector(1000, skip_idle=skip_idle) + collector.collect(frames) + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + _, str_defs, frame_defs, agg_record, _ = _jsonl_tables(records) + paths = {str_defs[item["path_str_id"]] for item in frame_defs} + funcs = {str_defs[item["func_str_id"]] for item in frame_defs} + return paths, funcs, agg_record["samples_total"] + + paths, funcs, samples_total = export_summary(skip_idle=True) + self.assertEqual(paths, {"active1.py", "active2.py"}) + self.assertEqual(funcs, {"active_func1", "active_func2"}) + self.assertEqual(samples_total, 2) + + paths, funcs, samples_total = export_summary(skip_idle=False) + self.assertEqual(paths, {"active1.py", "idle.py", "active2.py"}) + self.assertEqual(funcs, {"active_func1", "idle_func", "active_func2"}) + self.assertEqual(samples_total, 3) + + def test_jsonl_collector_splits_large_exports_into_chunks(self): + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + collector = JsonlCollector(1000) + + for i in range(257): + collector.collect( + [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo( + f"file{i}.py", i + 1, f"func{i}" + ) + ], + ) + ], + ) + ] + ) + + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + run_ids = {record["run_id"] for record in records} + self.assertEqual(len(run_ids), 1) + self.assertRegex(next(iter(run_ids)), r"^[0-9a-f]{32}$") + + _, str_defs, frame_defs, agg_record, end_record = _jsonl_tables( + records + ) + str_chunks = [ + record for record in records if record["type"] == "str_def" + ] + frame_chunks = [ + record for record in records if record["type"] == "frame_def" + ] + agg_chunks = [record for record in records if record["type"] == "agg"] + + self.assertEqual( + [len(record["defs"]) for record in str_chunks], [256, 256, 2] + ) + self.assertEqual( + [len(record["defs"]) for record in frame_chunks], [256, 1] + ) + self.assertEqual( + [len(record["entries"]) for record in agg_chunks], [256, 1] + ) + self.assertEqual(len(str_defs), 514) + self.assertEqual(len(frame_defs), 257) + self.assertEqual(agg_record["samples_total"], 257) + self.assertEqual(end_record["samples_total"], 257) + class TestRecursiveFunctionHandling(unittest.TestCase): """Tests for correct handling of recursive functions in cumulative stats.""" @@ -1874,6 +2164,11 @@ def test_extract_lineno_from_none(self): """Test extracting lineno from None (synthetic frames).""" self.assertEqual(extract_lineno(None), 0) + def test_normalize_location_with_int(self): + """Test normalize_location expands a legacy integer line number.""" + result = normalize_location(42) + self.assertEqual(result, (42, 42, -1, -1)) + def test_normalize_location_with_location_info(self): """Test normalize_location passes through LocationInfo.""" loc = LocationInfo(10, 15, 0, 5) @@ -2064,6 +2359,86 @@ def test_gecko_collector_with_location_info(self): # Verify function name is in string table self.assertIn("handle_request", string_array) + def test_jsonl_collector_with_location_info(self): + """Test JsonlCollector handles LocationInfo properly.""" + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + collector = JsonlCollector(sample_interval_usec=1000) + + # Frame with LocationInfo + frame = MockFrameInfo("test.py", 42, "my_function") + frames = [ + MockInterpreterInfo( + 0, [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + meta, str_defs, frame_defs, agg, end = _jsonl_tables(records) + self.assertEqual(meta["sample_interval_usec"], 1000) + self.assertEqual(agg["samples_total"], 1) + self.assertEqual(end["samples_total"], 1) + self.assertEqual(len(frame_defs), 1) + self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "test.py") + self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "my_function") + self.assertEqual( + frame_defs[0], + { + "frame_id": 1, + "path_str_id": frame_defs[0]["path_str_id"], + "func_str_id": frame_defs[0]["func_str_id"], + "line": 42, + "end_line": 42, + }, + ) + + def test_jsonl_collector_with_none_location(self): + """Test JsonlCollector handles None location (synthetic frames).""" + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + collector = JsonlCollector(sample_interval_usec=1000) + + # Create frame with None location (like GC frame) + frame = MockFrameInfo("~", 0, "") + frame.location = None # Synthetic frame has no location + frames = [ + MockInterpreterInfo( + 0, + [MockThreadInfo(1, [frame], status=THREAD_STATUS_HAS_GIL)] + ) + ] + collector.collect(frames) + + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + meta, str_defs, frame_defs, agg, end = _jsonl_tables(records) + self.assertEqual(meta["sample_interval_usec"], 1000) + self.assertEqual(agg["samples_total"], 1) + self.assertEqual(end["samples_total"], 1) + self.assertEqual(len(frame_defs), 1) + self.assertEqual(str_defs[frame_defs[0]["path_str_id"]], "~") + self.assertEqual(str_defs[frame_defs[0]["func_str_id"]], "") + self.assertEqual( + frame_defs[0], + { + "frame_id": 1, + "path_str_id": frame_defs[0]["path_str_id"], + "func_str_id": frame_defs[0]["func_str_id"], + "line": 0, + "synthetic": True, + }, + ) + class TestOpcodeHandling(unittest.TestCase): """Tests for opcode field handling in collectors.""" @@ -2284,6 +2659,28 @@ def test_gecko_collector_frame_format(self): # Should have recorded 3 functions self.assertEqual(thread["funcTable"]["length"], 3) + def test_jsonl_collector_frame_format(self): + """Test JsonlCollector with 4-element frame format.""" + collector = JsonlCollector(sample_interval_usec=1000) + collector.collect(self._make_sample_frames()) + + with tempfile.NamedTemporaryFile(delete=False) as f: + self.addClassCleanup(close_and_unlink, f) + collector.export(f.name) + + with open(f.name, "r", encoding="utf-8") as fp: + records = [json.loads(line) for line in fp] + + _, str_defs, frame_defs, _, _ = _jsonl_tables(records) + + self.assertEqual(len(frame_defs), 3) + + paths = {str_defs[item["path_str_id"]] for item in frame_defs} + funcs = {str_defs[item["func_str_id"]] for item in frame_defs} + + self.assertEqual(paths, {"app.py", "utils.py", "lib.py"}) + self.assertEqual(funcs, {"main", "helper", "process"}) + class TestInternalFrameFiltering(unittest.TestCase): """Tests for filtering internal profiler frames from output.""" @@ -2411,3 +2808,42 @@ def test_collapsed_stack_collector_filters_internal_frames(self): for (call_tree, _), _ in collector.stack_counter.items(): for filename, _, _ in call_tree: self.assertNotIn("_sync_coordinator", filename) + + def test_jsonl_collector_filters_internal_frames(self): + """Test that JsonlCollector filters out internal frames.""" + jsonl_out = tempfile.NamedTemporaryFile(delete=False) + self.addCleanup(close_and_unlink, jsonl_out) + + frames = [ + MockInterpreterInfo( + 0, + [ + MockThreadInfo( + 1, + [ + MockFrameInfo("app.py", 50, "run"), + MockFrameInfo("/lib/_sync_coordinator.py", 100, "main"), + MockFrameInfo("", 87, "_run_code"), + ], + status=THREAD_STATUS_HAS_GIL, + ) + ], + ) + ] + + collector = JsonlCollector(sample_interval_usec=1000) + collector.collect(frames) + collector.export(jsonl_out.name) + + with open(jsonl_out.name, "r", encoding="utf-8") as f: + records = [json.loads(line) for line in f] + + _, str_defs, frame_defs, _, _ = _jsonl_tables(records) + + paths = {str_defs[item["path_str_id"]] for item in frame_defs} + + self.assertIn("app.py", paths) + self.assertIn("", paths) + + for path in paths: + self.assertNotIn("_sync_coordinator", path) diff --git a/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst new file mode 100644 index 00000000000000..d270cc14288d8a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-31-17-33-10.gh-issue-146256.Nm_Ke_.rst @@ -0,0 +1,3 @@ +The ``profiling.sampling`` module now supports JSONL output format via +``--jsonl``. Each run emits newline-delimited JSON records suitable for +streaming or agents.