Skip to content

Commit 76ff93c

Browse files
Merge pull request #266 from askui/feat/conversation_duration
feat: add per-conversation duration to Html reports
2 parents ae5d6c2 + e12eb14 commit 76ff93c

5 files changed

Lines changed: 115 additions & 18 deletions

File tree

src/askui/agent_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from typing_extensions import Self
1111

1212
from askui.agent_settings import AgentSettings
13-
from askui.callbacks import ConversationCallback, UsageTrackingCallback
13+
from askui.callbacks import ConversationCallback, ConversationStatisticsCallback
1414
from askui.container import telemetry
1515
from askui.locators.locators import Locator
1616
from askui.models.shared.agent_message_param import MessageParam
@@ -78,7 +78,7 @@ def __init__(
7878
speakers = Speakers()
7979
_callbacks = list(callbacks or [])
8080
_callbacks.append(
81-
UsageTrackingCallback(
81+
ConversationStatisticsCallback(
8282
reporter=self._reporter,
8383
pricing=self._vlm_provider.pricing,
8484
)

src/askui/callbacks/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from .conversation_callback import ConversationCallback
2-
from .usage_tracking_callback import UsageTrackingCallback
2+
from .conversation_statistics_callback import ConversationStatisticsCallback
33

44
__all__ = [
55
"ConversationCallback",
6-
"UsageTrackingCallback",
6+
"ConversationStatisticsCallback",
77
]

src/askui/callbacks/usage_tracking_callback.py renamed to src/askui/callbacks/conversation_statistics_callback.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1-
"""Callback for tracking token usage and reporting usage summaries."""
1+
"""Callback for tracking per-conversation statistics (token usage, timing).
2+
3+
Emits a `UsageSummary` (with per-conversation and per-step breakdowns,
4+
including start/end timestamps for each conversation) to a reporter when the
5+
conversation ends.
6+
"""
27

38
from __future__ import annotations
49

10+
from datetime import datetime, timezone
511
from typing import TYPE_CHECKING
612

713
from opentelemetry import trace
@@ -172,15 +178,35 @@ class StepUsageSummary(UsageSummary):
172178

173179

174180
class ConversationUsageSummary(UsageSummary):
175-
"""Usage summary for one conversation including per-step breakdown."""
181+
"""Usage summary for one conversation including per-step breakdown.
182+
183+
Args:
184+
conversation_index (int): 1-based index of the conversation within the
185+
current agent lifecycle.
186+
conversation_id (str): Unique identifier of the conversation.
187+
step_summaries (list[StepUsageSummary]): Per-step usage summaries.
188+
started_at (datetime | None): UTC timestamp captured at
189+
`on_conversation_start`. `None` if timing was not tracked.
190+
ended_at (datetime | None): UTC timestamp captured at
191+
`on_conversation_end`. `None` if timing was not tracked.
192+
"""
176193

177194
conversation_index: int
178195
conversation_id: str
179196
step_summaries: list[StepUsageSummary] = Field(default_factory=list)
197+
started_at: datetime | None = None
198+
ended_at: datetime | None = None
199+
180200

201+
class ConversationStatisticsCallback(ConversationCallback):
202+
"""Tracks per-conversation statistics (token usage per step and wall-clock
203+
timing) and reports a summary at conversation end.
181204
182-
class UsageTrackingCallback(ConversationCallback):
183-
"""Tracks token usage per step and reports a summary at conversation end.
205+
The reported `UsageSummary` contains, for each conversation, the raw
206+
``started_at`` and ``ended_at`` UTC timestamps alongside token usage.
207+
Downstream consumers (e.g. `SimpleHtmlReporter`) are responsible for
208+
deriving human-readable durations from those timestamps so the raw values
209+
remain available for other uses.
184210
185211
Args:
186212
reporter: Reporter to write the final usage summary to.
@@ -199,12 +225,14 @@ def __init__(
199225
self._per_conversation_summaries: list[ConversationUsageSummary] = []
200226
self._per_step_summaries: list[StepUsageSummary] = []
201227
self._conversation_index: int = 0
228+
self._conversation_started_at: datetime | None = None
202229

203230
@override
204231
def on_conversation_start(self, conversation: Conversation) -> None:
205232
self._per_conversation_usage = UsageSummary.create_from(self._summary)
206233
self._per_step_summaries = []
207234
self._conversation_index += 1
235+
self._conversation_started_at = datetime.now(tz=timezone.utc)
208236

209237
@override
210238
def on_step_end(
@@ -237,9 +265,12 @@ def on_conversation_end(self, conversation: Conversation) -> None:
237265
generated_steps: list[StepUsageSummary] = [
238266
step_summary.generate() for step_summary in self._per_step_summaries
239267
]
268+
ended_at = datetime.now(tz=timezone.utc)
240269
conversation_summary = self._create_conversation_summary(
241270
conversation=conversation,
242271
generated_step_summaries=generated_steps,
272+
started_at=self._conversation_started_at,
273+
ended_at=ended_at,
243274
)
244275
self._per_conversation_summaries.append(conversation_summary)
245276
self._summary.per_conversation_summaries = list(
@@ -275,11 +306,15 @@ def _create_conversation_summary(
275306
self,
276307
conversation: Conversation,
277308
generated_step_summaries: list[StepUsageSummary],
309+
started_at: datetime | None = None,
310+
ended_at: datetime | None = None,
278311
) -> ConversationUsageSummary:
279312
conversation_summary = ConversationUsageSummary(
280313
conversation_index=self._conversation_index,
281314
conversation_id=conversation.conversation_id,
282315
step_summaries=generated_step_summaries,
316+
started_at=started_at,
317+
ended_at=ended_at,
283318
input_tokens=self._per_conversation_usage.input_tokens,
284319
output_tokens=self._per_conversation_usage.output_tokens,
285320
cache_creation_input_tokens=(

src/askui/reporting.py

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121
if TYPE_CHECKING:
2222
from PIL import Image
2323

24-
from askui.callbacks.usage_tracking_callback import UsageSummary
24+
from askui.callbacks.conversation_statistics_callback import (
25+
ConversationUsageSummary,
26+
UsageSummary,
27+
)
2528

2629

2730
def normalize_to_pil_images(
@@ -37,6 +40,27 @@ def normalize_to_pil_images(
3740
return [image]
3841

3942

43+
def _format_duration(seconds: float) -> str:
44+
"""Format a duration given in seconds as ``HH:MM:SS`` or
45+
``HH:MM:SS.mmm`` for sub-second precision.
46+
47+
Used by `SimpleHtmlReporter` to render both the overall execution time and
48+
per-conversation durations consistently.
49+
"""
50+
total_seconds = max(float(seconds), 0.0)
51+
whole_seconds = int(total_seconds)
52+
millis = int(round((total_seconds - whole_seconds) * 1000))
53+
if millis == 1000:
54+
whole_seconds += 1
55+
millis = 0
56+
hours, remainder = divmod(whole_seconds, 3600)
57+
minutes, secs = divmod(remainder, 60)
58+
base = f"{hours:02d}:{minutes:02d}:{secs:02d}"
59+
if whole_seconds == 0 and millis > 0:
60+
return f"{base}.{millis:03d}"
61+
return base
62+
63+
4064
def truncate_base64_images(content: Any) -> Any:
4165
"""Replace base64 image data with a placeholder to keep reports readable.
4266
@@ -1003,13 +1027,17 @@ def generate(self) -> None:
10031027
</p>
10041028
<div class="usage-breakdown-list">
10051029
{% for conversation_usage in usage_summary.per_conversation_summaries %}
1030+
{% set conversation_duration = format_conversation_duration(conversation_usage) %}
10061031
<details class="usage-breakdown-item">
10071032
<summary>
10081033
<span class="usage-breakdown-title">
10091034
Conversation #{{ conversation_usage.conversation_index }}
10101035
</span>
10111036
<span class="usage-breakdown-meta">
10121037
{{ conversation_usage.step_summaries | length }} step(s),
1038+
{% if conversation_duration is not none %}
1039+
Duration: {{ conversation_duration }},
1040+
{% endif %}
10131041
Input {{ "{:,}".format(conversation_usage.input_tokens or 0) }},
10141042
Output {{ "{:,}".format(conversation_usage.output_tokens or 0) }},
10151043
Cache Create {{ "{:,}".format(conversation_usage.cache_creation_input_tokens or 0) }},
@@ -1026,6 +1054,9 @@ def generate(self) -> None:
10261054
<table class="nested-table">
10271055
<tr>
10281056
<th>Conversation ID</th>
1057+
{% if conversation_duration is not none %}
1058+
<th>Duration</th>
1059+
{% endif %}
10291060
<th>Input Tokens</th>
10301061
<th>Output Tokens</th>
10311062
<th>Cache Create</th>
@@ -1036,6 +1067,9 @@ def generate(self) -> None:
10361067
</tr>
10371068
<tr class="system">
10381069
<td class="mono">{{ conversation_usage.conversation_id }}</td>
1070+
{% if conversation_duration is not none %}
1071+
<td>{{ conversation_duration }}</td>
1072+
{% endif %}
10391073
<td>{{ "{:,}".format(conversation_usage.input_tokens or 0) }}</td>
10401074
<td>{{ "{:,}".format(conversation_usage.output_tokens or 0) }}</td>
10411075
<td>{{ "{:,}".format(conversation_usage.cache_creation_input_tokens or 0) }}</td>
@@ -1141,10 +1175,28 @@ def generate(self) -> None:
11411175
end_time = datetime.now(tz=timezone.utc)
11421176
execution_time_formatted: str | None = None
11431177
if self._start_time is not None:
1144-
total_secs = int((end_time - self._start_time).total_seconds())
1145-
hours, remainder = divmod(total_secs, 3600)
1146-
minutes, secs = divmod(remainder, 60)
1147-
execution_time_formatted = f"{hours:02d}:{minutes:02d}:{secs:02d}"
1178+
execution_time_formatted = _format_duration(
1179+
(end_time - self._start_time).total_seconds()
1180+
)
1181+
1182+
def _format_conversation_duration(
1183+
conversation_usage: "ConversationUsageSummary",
1184+
) -> str | None:
1185+
"""Derive the formatted conversation duration from stored timestamps.
1186+
1187+
Returns ``None`` if either ``started_at`` or ``ended_at`` is missing
1188+
so the template can skip rendering.
1189+
"""
1190+
if (
1191+
conversation_usage.started_at is None
1192+
or conversation_usage.ended_at is None
1193+
):
1194+
return None
1195+
return _format_duration(
1196+
(
1197+
conversation_usage.ended_at - conversation_usage.started_at
1198+
).total_seconds()
1199+
)
11481200

11491201
html = template.render(
11501202
timestamp=end_time,
@@ -1153,6 +1205,7 @@ def generate(self) -> None:
11531205
usage_summary=self.usage_summary,
11541206
cache_original_usage=self.cache_original_usage,
11551207
execution_time_formatted=execution_time_formatted,
1208+
format_conversation_duration=_format_conversation_duration,
11561209
)
11571210

11581211
report_path = (

tests/unit/model_providers/test_model_pricing.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
import pytest
66

7-
from askui.callbacks.usage_tracking_callback import (
7+
from askui.callbacks.conversation_statistics_callback import (
8+
ConversationStatisticsCallback,
89
UsageSummary,
9-
UsageTrackingCallback,
1010
)
1111
from askui.models.shared.agent_message_param import UsageParam
1212
from askui.speaker.speaker import SpeakerResult
@@ -98,12 +98,12 @@ def _assert_close(
9898
assert abs(actual - expected) <= tolerance
9999

100100

101-
class TestUsageTrackingCallbackCost:
101+
class TestConversationStatisticsCallbackCost:
102102
def _make_callback(
103103
self, pricing: ModelPricing | None = None
104-
) -> tuple[UsageTrackingCallback, MagicMock]:
104+
) -> tuple[ConversationStatisticsCallback, MagicMock]:
105105
reporter = MagicMock()
106-
callback = UsageTrackingCallback(reporter=reporter, pricing=pricing)
106+
callback = ConversationStatisticsCallback(reporter=reporter, pricing=pricing)
107107
return callback, reporter
108108

109109
@pytest.mark.parametrize(
@@ -245,6 +245,9 @@ def test_tracks_per_step_per_conversation_and_total_usage(self) -> None:
245245
assert per_conversation_summary.output_tokens == 30
246246
_assert_close(per_conversation_summary.total_cost, 0.0009)
247247
assert len(per_conversation_summary.step_summaries) == 2
248+
assert per_conversation_summary.started_at is not None
249+
assert per_conversation_summary.ended_at is not None
250+
assert per_conversation_summary.ended_at >= per_conversation_summary.started_at
248251

249252
first_step = per_conversation_summary.step_summaries[0]
250253
assert first_step.step_index == 0
@@ -301,6 +304,12 @@ def test_accumulates_multiple_conversations(self) -> None:
301304
assert len(summary.per_conversation_summaries) == 2
302305
assert summary.per_conversation_summaries[0].conversation_id == "conversation-1"
303306
assert summary.per_conversation_summaries[1].conversation_id == "conversation-2"
307+
for per_conversation_summary in summary.per_conversation_summaries:
308+
assert per_conversation_summary.started_at is not None
309+
assert per_conversation_summary.ended_at is not None
310+
assert (
311+
per_conversation_summary.ended_at >= per_conversation_summary.started_at
312+
)
304313

305314
def test_includes_cache_costs_from_provider_pricing(self) -> None:
306315
pricing = ModelPricing(

0 commit comments

Comments
 (0)