Skip to content

Commit e12eb14

Browse files
fix: address review comments
1 parent 4f7cee0 commit e12eb14

5 files changed

Lines changed: 77 additions & 37 deletions

File tree

src/askui/agent_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from typing_extensions import Self
1111

1212
from askui.agent_settings import AgentSettings
13-
from askui.callbacks import ConversationCallback, UsageTrackingCallback
13+
from askui.callbacks import ConversationCallback, ConversationStatisticsCallback
1414
from askui.container import telemetry
1515
from askui.locators.locators import Locator
1616
from askui.models.shared.agent_message_param import MessageParam
@@ -78,7 +78,7 @@ def __init__(
7878
speakers = Speakers()
7979
_callbacks = list(callbacks or [])
8080
_callbacks.append(
81-
UsageTrackingCallback(
81+
ConversationStatisticsCallback(
8282
reporter=self._reporter,
8383
pricing=self._vlm_provider.pricing,
8484
)

src/askui/callbacks/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from .conversation_callback import ConversationCallback
2-
from .usage_tracking_callback import UsageTrackingCallback
2+
from .conversation_statistics_callback import ConversationStatisticsCallback
33

44
__all__ = [
55
"ConversationCallback",
6-
"UsageTrackingCallback",
6+
"ConversationStatisticsCallback",
77
]

src/askui/callbacks/usage_tracking_callback.py renamed to src/askui/callbacks/conversation_statistics_callback.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
"""Callback for tracking token usage and reporting usage summaries."""
1+
"""Callback for tracking per-conversation statistics (token usage, timing).
2+
3+
Emits a `UsageSummary` (with per-conversation and per-step breakdowns,
4+
including start/end timestamps for each conversation) to a reporter when the
5+
conversation ends.
6+
"""
27

38
from __future__ import annotations
49

@@ -180,19 +185,28 @@ class ConversationUsageSummary(UsageSummary):
180185
current agent lifecycle.
181186
conversation_id (str): Unique identifier of the conversation.
182187
step_summaries (list[StepUsageSummary]): Per-step usage summaries.
183-
duration_seconds (float | None): Wall-clock duration of the conversation
184-
in seconds, measured between `on_conversation_start` and
185-
`on_conversation_end`. `None` if duration was not tracked.
188+
started_at (datetime | None): UTC timestamp captured at
189+
`on_conversation_start`. `None` if timing was not tracked.
190+
ended_at (datetime | None): UTC timestamp captured at
191+
`on_conversation_end`. `None` if timing was not tracked.
186192
"""
187193

188194
conversation_index: int
189195
conversation_id: str
190196
step_summaries: list[StepUsageSummary] = Field(default_factory=list)
191-
duration_seconds: float | None = None
197+
started_at: datetime | None = None
198+
ended_at: datetime | None = None
199+
192200

201+
class ConversationStatisticsCallback(ConversationCallback):
202+
"""Tracks per-conversation statistics (token usage per step and wall-clock
203+
timing) and reports a summary at conversation end.
193204
194-
class UsageTrackingCallback(ConversationCallback):
195-
"""Tracks token usage per step and reports a summary at conversation end.
205+
The reported `UsageSummary` contains, for each conversation, the raw
206+
``started_at`` and ``ended_at`` UTC timestamps alongside token usage.
207+
Downstream consumers (e.g. `SimpleHtmlReporter`) are responsible for
208+
deriving human-readable durations from those timestamps so the raw values
209+
remain available for other uses.
196210
197211
Args:
198212
reporter: Reporter to write the final usage summary to.
@@ -211,14 +225,14 @@ def __init__(
211225
self._per_conversation_summaries: list[ConversationUsageSummary] = []
212226
self._per_step_summaries: list[StepUsageSummary] = []
213227
self._conversation_index: int = 0
214-
self._conversation_start_time: datetime | None = None
228+
self._conversation_started_at: datetime | None = None
215229

216230
@override
217231
def on_conversation_start(self, conversation: Conversation) -> None:
218232
self._per_conversation_usage = UsageSummary.create_from(self._summary)
219233
self._per_step_summaries = []
220234
self._conversation_index += 1
221-
self._conversation_start_time = datetime.now(tz=timezone.utc)
235+
self._conversation_started_at = datetime.now(tz=timezone.utc)
222236

223237
@override
224238
def on_step_end(
@@ -251,15 +265,12 @@ def on_conversation_end(self, conversation: Conversation) -> None:
251265
generated_steps: list[StepUsageSummary] = [
252266
step_summary.generate() for step_summary in self._per_step_summaries
253267
]
254-
duration_seconds: float | None = None
255-
if self._conversation_start_time is not None:
256-
duration_seconds = (
257-
datetime.now(tz=timezone.utc) - self._conversation_start_time
258-
).total_seconds()
268+
ended_at = datetime.now(tz=timezone.utc)
259269
conversation_summary = self._create_conversation_summary(
260270
conversation=conversation,
261271
generated_step_summaries=generated_steps,
262-
duration_seconds=duration_seconds,
272+
started_at=self._conversation_started_at,
273+
ended_at=ended_at,
263274
)
264275
self._per_conversation_summaries.append(conversation_summary)
265276
self._summary.per_conversation_summaries = list(
@@ -295,13 +306,15 @@ def _create_conversation_summary(
295306
self,
296307
conversation: Conversation,
297308
generated_step_summaries: list[StepUsageSummary],
298-
duration_seconds: float | None = None,
309+
started_at: datetime | None = None,
310+
ended_at: datetime | None = None,
299311
) -> ConversationUsageSummary:
300312
conversation_summary = ConversationUsageSummary(
301313
conversation_index=self._conversation_index,
302314
conversation_id=conversation.conversation_id,
303315
step_summaries=generated_step_summaries,
304-
duration_seconds=duration_seconds,
316+
started_at=started_at,
317+
ended_at=ended_at,
305318
input_tokens=self._per_conversation_usage.input_tokens,
306319
output_tokens=self._per_conversation_usage.output_tokens,
307320
cache_creation_input_tokens=(

src/askui/reporting.py

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
if TYPE_CHECKING:
2222
from PIL import Image
2323

24-
from askui.callbacks.usage_tracking_callback import UsageSummary
24+
from askui.callbacks.conversation_statistics_callback import (
25+
ConversationUsageSummary,
26+
UsageSummary,
27+
)
2528

2629

2730
def normalize_to_pil_images(
@@ -1024,15 +1027,16 @@ def generate(self) -> None:
10241027
</p>
10251028
<div class="usage-breakdown-list">
10261029
{% for conversation_usage in usage_summary.per_conversation_summaries %}
1030+
{% set conversation_duration = format_conversation_duration(conversation_usage) %}
10271031
<details class="usage-breakdown-item">
10281032
<summary>
10291033
<span class="usage-breakdown-title">
10301034
Conversation #{{ conversation_usage.conversation_index }}
10311035
</span>
10321036
<span class="usage-breakdown-meta">
10331037
{{ conversation_usage.step_summaries | length }} step(s),
1034-
{% if conversation_usage.duration_seconds is not none %}
1035-
Duration: {{ format_duration(conversation_usage.duration_seconds) }},
1038+
{% if conversation_duration is not none %}
1039+
Duration: {{ conversation_duration }},
10361040
{% endif %}
10371041
Input {{ "{:,}".format(conversation_usage.input_tokens or 0) }},
10381042
Output {{ "{:,}".format(conversation_usage.output_tokens or 0) }},
@@ -1050,7 +1054,7 @@ def generate(self) -> None:
10501054
<table class="nested-table">
10511055
<tr>
10521056
<th>Conversation ID</th>
1053-
{% if conversation_usage.duration_seconds is not none %}
1057+
{% if conversation_duration is not none %}
10541058
<th>Duration</th>
10551059
{% endif %}
10561060
<th>Input Tokens</th>
@@ -1063,8 +1067,8 @@ def generate(self) -> None:
10631067
</tr>
10641068
<tr class="system">
10651069
<td class="mono">{{ conversation_usage.conversation_id }}</td>
1066-
{% if conversation_usage.duration_seconds is not none %}
1067-
<td>{{ format_duration(conversation_usage.duration_seconds) }}</td>
1070+
{% if conversation_duration is not none %}
1071+
<td>{{ conversation_duration }}</td>
10681072
{% endif %}
10691073
<td>{{ "{:,}".format(conversation_usage.input_tokens or 0) }}</td>
10701074
<td>{{ "{:,}".format(conversation_usage.output_tokens or 0) }}</td>
@@ -1175,14 +1179,33 @@ def generate(self) -> None:
11751179
(end_time - self._start_time).total_seconds()
11761180
)
11771181

1182+
def _format_conversation_duration(
1183+
conversation_usage: "ConversationUsageSummary",
1184+
) -> str | None:
1185+
"""Derive the formatted conversation duration from stored timestamps.
1186+
1187+
Returns ``None`` if either ``started_at`` or ``ended_at`` is missing
1188+
so the template can skip rendering.
1189+
"""
1190+
if (
1191+
conversation_usage.started_at is None
1192+
or conversation_usage.ended_at is None
1193+
):
1194+
return None
1195+
return _format_duration(
1196+
(
1197+
conversation_usage.ended_at - conversation_usage.started_at
1198+
).total_seconds()
1199+
)
1200+
11781201
html = template.render(
11791202
timestamp=end_time,
11801203
messages=self.messages,
11811204
system_info=self.system_info,
11821205
usage_summary=self.usage_summary,
11831206
cache_original_usage=self.cache_original_usage,
11841207
execution_time_formatted=execution_time_formatted,
1185-
format_duration=_format_duration,
1208+
format_conversation_duration=_format_conversation_duration,
11861209
)
11871210

11881211
report_path = (

tests/unit/model_providers/test_model_pricing.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
import pytest
66

7-
from askui.callbacks.usage_tracking_callback import (
7+
from askui.callbacks.conversation_statistics_callback import (
8+
ConversationStatisticsCallback,
89
UsageSummary,
9-
UsageTrackingCallback,
1010
)
1111
from askui.models.shared.agent_message_param import UsageParam
1212
from askui.speaker.speaker import SpeakerResult
@@ -98,12 +98,12 @@ def _assert_close(
9898
assert abs(actual - expected) <= tolerance
9999

100100

101-
class TestUsageTrackingCallbackCost:
101+
class TestConversationStatisticsCallbackCost:
102102
def _make_callback(
103103
self, pricing: ModelPricing | None = None
104-
) -> tuple[UsageTrackingCallback, MagicMock]:
104+
) -> tuple[ConversationStatisticsCallback, MagicMock]:
105105
reporter = MagicMock()
106-
callback = UsageTrackingCallback(reporter=reporter, pricing=pricing)
106+
callback = ConversationStatisticsCallback(reporter=reporter, pricing=pricing)
107107
return callback, reporter
108108

109109
@pytest.mark.parametrize(
@@ -245,8 +245,9 @@ def test_tracks_per_step_per_conversation_and_total_usage(self) -> None:
245245
assert per_conversation_summary.output_tokens == 30
246246
_assert_close(per_conversation_summary.total_cost, 0.0009)
247247
assert len(per_conversation_summary.step_summaries) == 2
248-
assert per_conversation_summary.duration_seconds is not None
249-
assert per_conversation_summary.duration_seconds >= 0.0
248+
assert per_conversation_summary.started_at is not None
249+
assert per_conversation_summary.ended_at is not None
250+
assert per_conversation_summary.ended_at >= per_conversation_summary.started_at
250251

251252
first_step = per_conversation_summary.step_summaries[0]
252253
assert first_step.step_index == 0
@@ -304,8 +305,11 @@ def test_accumulates_multiple_conversations(self) -> None:
304305
assert summary.per_conversation_summaries[0].conversation_id == "conversation-1"
305306
assert summary.per_conversation_summaries[1].conversation_id == "conversation-2"
306307
for per_conversation_summary in summary.per_conversation_summaries:
307-
assert per_conversation_summary.duration_seconds is not None
308-
assert per_conversation_summary.duration_seconds >= 0.0
308+
assert per_conversation_summary.started_at is not None
309+
assert per_conversation_summary.ended_at is not None
310+
assert (
311+
per_conversation_summary.ended_at >= per_conversation_summary.started_at
312+
)
309313

310314
def test_includes_cache_costs_from_provider_pricing(self) -> None:
311315
pricing = ModelPricing(

0 commit comments

Comments
 (0)