Skip to content

Commit dc8c89f

Browse files
Merge pull request #50 from izzet/feature/console-additional-metrics-output
Add view-scoped additional metrics and console reporting
2 parents e6ffdf1 + 4ce764c commit dc8c89f

4 files changed

Lines changed: 77 additions & 5 deletions

File tree

python/dftracer/analyzer/analyzer.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,10 @@ def _analyze_hlm(
951951
_hlms=hlms,
952952
_main_views=main_views,
953953
_metric_boundaries=metric_boundaries,
954+
additional_metrics={
955+
view_type: list(metrics.keys())
956+
for view_type, metrics in (self.preset.additional_metrics or {}).items()
957+
}, # type: ignore
954958
checkpoint_dir=self.checkpoint_dir,
955959
flat_views=flat_views,
956960
layers=self.layers,
@@ -1112,16 +1116,22 @@ def _process_flat_view(
11121116
time_boundary_layer=self.get_time_boundary_layer(),
11131117
)
11141118
with log_block("set_additional_metrics", view_key=view_key):
1115-
flat_view = self._set_additional_metrics(flat_view, is_view_process_based=is_view_process_based)
1119+
flat_view = self._set_additional_metrics(
1120+
flat_view,
1121+
view_key=view_key,
1122+
)
11161123
return flat_view.sort_index(axis=1)
11171124

11181125
@staticmethod
11191126
def _save_flat_view(view: pd.DataFrame, view_path: str):
11201127
view.to_parquet(f"{view_path}.parquet")
11211128

1122-
def _set_additional_metrics(self, view: pd.DataFrame, is_view_process_based: bool, epsilon=1e-9) -> pd.DataFrame:
1129+
def _set_additional_metrics(self, view: pd.DataFrame, view_key: ViewKey, epsilon=1e-9) -> pd.DataFrame:
1130+
view_type = view_key[-1]
1131+
is_view_process_based = self.is_view_process_based(view_key)
11231132
time_metric = "time_sum" if is_view_process_based else "time_max"
1124-
for metric, eval_condition in self.preset.additional_metrics.items():
1133+
view_additional_metrics = (self.preset.additional_metrics or {}).get(view_type, {})
1134+
for metric, eval_condition in view_additional_metrics.items():
11251135
eval_condition = eval_condition.format(
11261136
epsilon=epsilon,
11271137
time_interval=self.time_granularity,

python/dftracer/analyzer/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
@dc.dataclass
3333
class AnalyzerPresetConfig:
34-
additional_metrics: Optional[Dict[str, Optional[str]]] = dc.field(default_factory=dict)
34+
additional_metrics: Optional[Dict[str, Dict[str, str]]] = dc.field(default_factory=dict)
3535
async_layers: Optional[List[str]] = dc.field(default_factory=list)
3636
derived_metrics: Optional[Dict[str, Dict[str, str]]] = dc.field(default_factory=dict)
3737
layer_defs: Dict[str, Optional[str]] = MISSING

python/dftracer/analyzer/output.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import dask
44
import dataclasses as dc
55
import inflect
6+
import numpy as np
67
import pandas as pd
78
from rich.console import Console
89
from rich.table import Table
910
from typing import Dict, List, Optional
1011

11-
from .constants import COL_PROC_NAME, HUMANIZED_LAYERS, Layer, MiB
12+
from .constants import COL_PROC_NAME, HUMANIZED_LAYERS, GiB, Layer, MiB
1213
from .types import (
1314
AnalyzerResultType,
1415
RawStats,
@@ -155,6 +156,9 @@ def handle_result(self, result: AnalyzerResultType):
155156
summary_table = self._create_summary_table(summary=summary, view_key=view_key)
156157
layer_breakdown_table = self._create_layer_breakdown_table(summary=summary, view_key=view_key)
157158
print_objects.append(summary_table)
159+
additional_metrics_table = self._create_additional_metrics_table(result=result, view_key=view_key)
160+
if additional_metrics_table is not None:
161+
print_objects.append(additional_metrics_table)
158162
print_objects.append(layer_breakdown_table)
159163
console = Console(record=True)
160164
console.print(*print_objects)
@@ -232,6 +236,63 @@ def _create_summary_table(self, summary: OutputSummary, view_key: ViewKey) -> Ta
232236

233237
return summary_table
234238

239+
def _create_additional_metrics_table(self, result: AnalyzerResultType, view_key: ViewKey) -> Optional[Table]:
240+
if not result.additional_metrics:
241+
return None
242+
243+
flat_view = result.flat_views[view_key]
244+
view_type = view_key[-1]
245+
view_additional_metrics = result.additional_metrics.get(view_type, [])
246+
if not view_additional_metrics:
247+
return None
248+
view_name = humanized_view_name(view_key, ' ')
249+
additional_table = Table(title=f"{view_name} Additional Metrics", title_style='bold magenta', expand=True)
250+
additional_table.add_column(header='Metric', style='bold')
251+
additional_table.add_column(header='Unit', style='italic')
252+
additional_table.add_column(header='Non-null', justify='right')
253+
additional_table.add_column(header='Min', justify='right')
254+
additional_table.add_column(header='Mean', justify='right')
255+
additional_table.add_column(header='Max', justify='right')
256+
257+
found_metric = False
258+
for metric in view_additional_metrics:
259+
if metric not in flat_view.columns:
260+
continue
261+
metric_series = pd.to_numeric(flat_view[metric], errors='coerce').replace([np.inf, -np.inf], pd.NA)
262+
scale, unit = self._additional_metric_scale_and_unit(metric)
263+
metric_series = metric_series / scale
264+
non_null = int(metric_series.notna().sum())
265+
if non_null == 0:
266+
additional_table.add_row(metric, unit, "0", "-", "-", "-")
267+
found_metric = True
268+
continue
269+
additional_table.add_row(
270+
metric,
271+
unit,
272+
f"{non_null:,}",
273+
f"{float(metric_series.min()):.3f}",
274+
f"{float(metric_series.mean()):.3f}",
275+
f"{float(metric_series.max()):.3f}",
276+
)
277+
found_metric = True
278+
279+
if not found_metric:
280+
return None
281+
return additional_table
282+
283+
@staticmethod
284+
def _additional_metric_scale_and_unit(metric: str):
285+
metric_lower = metric.lower()
286+
if metric_lower.endswith('_gbps'):
287+
return GiB, 'GB/s'
288+
if metric_lower.endswith('_mbps'):
289+
return MiB, 'MB/s'
290+
if metric_lower.endswith('_gb'):
291+
return GiB, 'GB'
292+
if metric_lower.endswith('_mb'):
293+
return MiB, 'MB'
294+
return 1.0, '-'
295+
235296
def _format_val(self, value: float, fmt_int=False) -> str:
236297
if value is None or value == 0:
237298
return '-'

python/dftracer/analyzer/types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ class OutputType:
192192

193193
@dc.dataclass
194194
class AnalyzerResultType:
195+
additional_metrics: Dict[ViewType, List[str]]
195196
checkpoint_dir: str
196197
flat_views: Dict[ViewKey, pd.DataFrame]
197198
layers: List[Layer]

0 commit comments

Comments
 (0)