Skip to content

Commit 162cf2c

Browse files
authored
Merge pull request #1396 from dgageot/costs
Pass message level costs in --remote mode and `cagent exec --json`
2 parents b849716 + bc08e74 commit 162cf2c

13 files changed

Lines changed: 575 additions & 5331 deletions

File tree

gen/cagent/v1/cagent.pb.go

Lines changed: 0 additions & 4686 deletions
This file was deleted.

gen/cagent/v1/cagentv1connect/cagent.connect.go

Lines changed: 0 additions & 416 deletions
This file was deleted.

gen/proto/cagent/v1/cagent.pb.go

Lines changed: 239 additions & 136 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/connectrpc/server.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,20 @@ func toolCallResultToProto(r *tools.ToolCallResult) *cagentv1.ToolCallResult {
377377
}
378378
}
379379

380+
func messageUsageToProto(m *runtime.MessageUsage) *cagentv1.LastMessageUsage {
381+
if m == nil {
382+
return nil
383+
}
384+
return &cagentv1.LastMessageUsage{
385+
InputTokens: m.InputTokens,
386+
OutputTokens: m.OutputTokens,
387+
CachedInputTokens: m.CachedInputTokens,
388+
CacheWriteTokens: m.CacheWriteTokens,
389+
Cost: m.Cost,
390+
Model: m.Model,
391+
}
392+
}
393+
380394
func runtimeEventToProto(event runtime.Event) *cagentv1.Event {
381395
switch e := event.(type) {
382396
case *runtime.UserMessageEvent:
@@ -505,6 +519,7 @@ func runtimeEventToProto(event runtime.Event) *cagentv1.Event {
505519
ContextLength: e.Usage.ContextLength,
506520
ContextLimit: e.Usage.ContextLimit,
507521
Cost: e.Usage.Cost,
522+
LastMessage: messageUsageToProto(e.Usage.LastMessage),
508523
},
509524
AgentName: e.AgentName,
510525
},

pkg/runtime/connectrpc_client.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
cagentv1 "github.com/docker/cagent/gen/proto/cagent/v1"
1414
"github.com/docker/cagent/gen/proto/cagent/v1/cagentv1connect"
1515
"github.com/docker/cagent/pkg/api"
16+
"github.com/docker/cagent/pkg/chat"
1617
"github.com/docker/cagent/pkg/config/latest"
1718
"github.com/docker/cagent/pkg/session"
1819
"github.com/docker/cagent/pkg/tools"
@@ -345,6 +346,7 @@ func (c *ConnectRPCClient) convertProtoEventToRuntimeEvent(e *cagentv1.Event) Ev
345346
ContextLength: ev.TokenUsage.Usage.ContextLength,
346347
ContextLimit: ev.TokenUsage.Usage.ContextLimit,
347348
Cost: ev.TokenUsage.Usage.Cost,
349+
LastMessage: convertProtoMessageUsage(ev.TokenUsage.Usage.LastMessage),
348350
}
349351
}
350352
return &TokenUsageEvent{
@@ -526,6 +528,22 @@ func convertProtoToolCall(tc *cagentv1.ToolCall) tools.ToolCall {
526528
}
527529
}
528530

531+
func convertProtoMessageUsage(m *cagentv1.LastMessageUsage) *MessageUsage {
532+
if m == nil {
533+
return nil
534+
}
535+
return &MessageUsage{
536+
Usage: chat.Usage{
537+
InputTokens: m.InputTokens,
538+
OutputTokens: m.OutputTokens,
539+
CachedInputTokens: m.CachedInputTokens,
540+
CacheWriteTokens: m.CacheWriteTokens,
541+
},
542+
Cost: m.Cost,
543+
Model: m.Model,
544+
}
545+
}
546+
529547
func convertProtoTool(t *cagentv1.Tool) tools.Tool {
530548
if t == nil {
531549
return tools.Tool{}

pkg/runtime/event.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package runtime
33
import (
44
"cmp"
55

6+
"github.com/docker/cagent/pkg/chat"
67
"github.com/docker/cagent/pkg/tools"
78
)
89

@@ -194,14 +195,27 @@ type TokenUsageEvent struct {
194195
}
195196

196197
type Usage struct {
197-
InputTokens int64 `json:"input_tokens"`
198-
OutputTokens int64 `json:"output_tokens"`
199-
ContextLength int64 `json:"context_length"`
200-
ContextLimit int64 `json:"context_limit"`
201-
Cost float64 `json:"cost"`
198+
InputTokens int64 `json:"input_tokens"`
199+
OutputTokens int64 `json:"output_tokens"`
200+
ContextLength int64 `json:"context_length"`
201+
ContextLimit int64 `json:"context_limit"`
202+
Cost float64 `json:"cost"`
203+
LastMessage *MessageUsage `json:"last_message,omitempty"`
204+
}
205+
206+
// MessageUsage contains per-message usage data to include in TokenUsageEvent.
207+
// It embeds chat.Usage and adds Cost and Model fields.
208+
type MessageUsage struct {
209+
chat.Usage
210+
Cost float64
211+
Model string
202212
}
203213

204214
func TokenUsage(sessionID, agentName string, inputTokens, outputTokens, contextLength, contextLimit int64, cost float64) Event {
215+
return TokenUsageWithMessage(sessionID, agentName, inputTokens, outputTokens, contextLength, contextLimit, cost, nil)
216+
}
217+
218+
func TokenUsageWithMessage(sessionID, agentName string, inputTokens, outputTokens, contextLength, contextLimit int64, cost float64, msgUsage *MessageUsage) Event {
205219
return &TokenUsageEvent{
206220
Type: "token_usage",
207221
SessionID: sessionID,
@@ -211,6 +225,7 @@ func TokenUsage(sessionID, agentName string, inputTokens, outputTokens, contextL
211225
InputTokens: inputTokens,
212226
OutputTokens: outputTokens,
213227
Cost: cost,
228+
LastMessage: msgUsage,
214229
},
215230
AgentContext: AgentContext{AgentName: agentName},
216231
}

pkg/runtime/runtime.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
811811

812812
// Add assistant message to conversation history, but skip empty assistant messages
813813
// Providers reject assistant messages that have neither content nor tool calls.
814+
var msgUsage *MessageUsage
814815
if strings.TrimSpace(res.Content) != "" || len(res.Calls) > 0 {
815816
// Build tool definitions for the tool calls
816817
var toolDefs []tools.Tool
@@ -855,14 +856,23 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
855856
Cost: messageCost,
856857
}
857858

859+
// Build per-message usage for the event
860+
if res.Usage != nil {
861+
msgUsage = &MessageUsage{
862+
Usage: *res.Usage,
863+
Cost: messageCost,
864+
Model: messageModel,
865+
}
866+
}
867+
858868
sess.AddMessage(session.NewAgentMessage(a, &assistantMessage))
859869
r.saveSession(ctx, sess)
860870
slog.Debug("Added assistant message to session", "agent", a.Name(), "total_messages", len(sess.GetAllMessages()))
861871
} else {
862872
slog.Debug("Skipping empty assistant message (no content and no tool calls)", "agent", a.Name())
863873
}
864874

865-
events <- TokenUsage(sess.ID, r.currentAgent, sess.InputTokens, sess.OutputTokens, sess.InputTokens+sess.OutputTokens, contextLimit, sess.Cost)
875+
events <- TokenUsageWithMessage(sess.ID, r.currentAgent, sess.InputTokens, sess.OutputTokens, sess.InputTokens+sess.OutputTokens, contextLimit, sess.Cost, msgUsage)
866876

867877
r.processToolCalls(ctx, sess, res.Calls, agentTools, events)
868878

pkg/runtime/runtime_test.go

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,10 @@ func TestSimple(t *testing.T) {
219219
UserMessage("Hi"),
220220
StreamStarted(sess.ID, "root"),
221221
AgentChoice("root", "Hello"),
222-
TokenUsage(sess.ID, "root", 3, 2, 5, 0, 0),
222+
TokenUsageWithMessage(sess.ID, "root", 3, 2, 5, 0, 0, &MessageUsage{
223+
Usage: chat.Usage{InputTokens: 3, OutputTokens: 2},
224+
Model: "test/mock-model",
225+
}),
223226
StreamStopped(sess.ID, "root"),
224227
}
225228

@@ -251,7 +254,10 @@ func TestMultipleContentChunks(t *testing.T) {
251254
AgentChoice("root", "how "),
252255
AgentChoice("root", "are "),
253256
AgentChoice("root", "you?"),
254-
TokenUsage(sess.ID, "root", 8, 12, 20, 0, 0),
257+
TokenUsageWithMessage(sess.ID, "root", 8, 12, 20, 0, 0, &MessageUsage{
258+
Usage: chat.Usage{InputTokens: 8, OutputTokens: 12},
259+
Model: "test/mock-model",
260+
}),
255261
StreamStopped(sess.ID, "root"),
256262
}
257263

@@ -279,7 +285,10 @@ func TestWithReasoning(t *testing.T) {
279285
AgentChoiceReasoning("root", "Let me think about this..."),
280286
AgentChoiceReasoning("root", " I should respond politely."),
281287
AgentChoice("root", "Hello, how can I help you?"),
282-
TokenUsage(sess.ID, "root", 10, 15, 25, 0, 0),
288+
TokenUsageWithMessage(sess.ID, "root", 10, 15, 25, 0, 0, &MessageUsage{
289+
Usage: chat.Usage{InputTokens: 10, OutputTokens: 15},
290+
Model: "test/mock-model",
291+
}),
283292
StreamStopped(sess.ID, "root"),
284293
}
285294

@@ -309,7 +318,10 @@ func TestMixedContentAndReasoning(t *testing.T) {
309318
AgentChoice("root", "Hello!"),
310319
AgentChoiceReasoning("root", " I should be friendly"),
311320
AgentChoice("root", " How can I help you today?"),
312-
TokenUsage(sess.ID, "root", 15, 20, 35, 0, 0),
321+
TokenUsageWithMessage(sess.ID, "root", 15, 20, 35, 0, 0, &MessageUsage{
322+
Usage: chat.Usage{InputTokens: 15, OutputTokens: 20},
323+
Model: "test/mock-model",
324+
}),
313325
StreamStopped(sess.ID, "root"),
314326
}
315327

pkg/session/session.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,20 @@ type Session struct {
9999
// Sub-sessions are not persisted as standalone entries; they are embedded
100100
// within the parent session's Messages array.
101101
ParentID string `json:"-"`
102+
103+
// MessageUsageHistory stores per-message usage data for remote mode.
104+
// In remote mode, messages are managed server-side, so we track usage separately.
105+
// This is not persisted (json:"-") as it's only needed for the current session display.
106+
MessageUsageHistory []MessageUsageRecord `json:"-"`
107+
}
108+
109+
// MessageUsageRecord stores usage data for a single assistant message.
110+
// Used in remote mode where messages aren't stored in the client-side session.
111+
type MessageUsageRecord struct {
112+
AgentName string `json:"agent_name"`
113+
Model string `json:"model"`
114+
Cost float64 `json:"cost"`
115+
Usage chat.Usage `json:"usage"`
102116
}
103117

104118
// Permission mode constants
@@ -300,6 +314,35 @@ func (s *Session) getLastMessageContentByRole(role chat.MessageRole) string {
300314
return ""
301315
}
302316

317+
// UpdateLastAssistantMessageUsage updates the usage and cost fields of the last assistant message.
318+
// This is used in remote mode to populate per-message cost data from TokenUsageEvent.
319+
func (s *Session) UpdateLastAssistantMessageUsage(usage *chat.Usage, cost float64, model string) {
320+
for i := len(s.Messages) - 1; i >= 0; i-- {
321+
if s.Messages[i].IsMessage() && s.Messages[i].Message.Message.Role == chat.MessageRoleAssistant {
322+
s.Messages[i].Message.Message.Usage = usage
323+
s.Messages[i].Message.Message.Cost = cost
324+
if model != "" {
325+
s.Messages[i].Message.Message.Model = model
326+
}
327+
return
328+
}
329+
}
330+
}
331+
332+
// AddMessageUsageRecord appends a usage record for remote mode where messages aren't stored locally.
333+
// This enables the /cost dialog to show per-message breakdown even when using a remote runtime.
334+
func (s *Session) AddMessageUsageRecord(agentName, model string, cost float64, usage *chat.Usage) {
335+
if usage == nil {
336+
return
337+
}
338+
s.MessageUsageHistory = append(s.MessageUsageHistory, MessageUsageRecord{
339+
AgentName: agentName,
340+
Model: model,
341+
Cost: cost,
342+
Usage: *usage,
343+
})
344+
}
345+
303346
type Opt func(s *Session)
304347

305348
func WithUserMessage(content string) Opt {

pkg/session/session_test.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,104 @@ func TestGetMessages_CacheControlWithSummary(t *testing.T) {
217217
// Verify checkpoint #2 is on date
218218
assert.Contains(t, messages[checkpointIndices[1]].Content, "Today's date", "checkpoint #2 should be on date message")
219219
}
220+
221+
func TestUpdateLastAssistantMessageUsage(t *testing.T) {
222+
testAgent := &agent.Agent{}
223+
224+
s := New()
225+
226+
// Add user message
227+
s.AddMessage(NewAgentMessage(testAgent, &chat.Message{
228+
Role: chat.MessageRoleUser,
229+
Content: "hello",
230+
}))
231+
232+
// Add assistant message without usage
233+
s.AddMessage(NewAgentMessage(testAgent, &chat.Message{
234+
Role: chat.MessageRoleAssistant,
235+
Content: "response",
236+
}))
237+
238+
// Update the last assistant message with usage data
239+
usage := &chat.Usage{
240+
InputTokens: 100,
241+
OutputTokens: 50,
242+
CachedInputTokens: 10,
243+
}
244+
s.UpdateLastAssistantMessageUsage(usage, 0.005, "gpt-4")
245+
246+
// Verify the update
247+
messages := s.GetAllMessages()
248+
assert.Len(t, messages, 2)
249+
250+
lastMsg := messages[1]
251+
assert.Equal(t, chat.MessageRoleAssistant, lastMsg.Message.Role)
252+
assert.NotNil(t, lastMsg.Message.Usage)
253+
assert.Equal(t, int64(100), lastMsg.Message.Usage.InputTokens)
254+
assert.Equal(t, int64(50), lastMsg.Message.Usage.OutputTokens)
255+
assert.Equal(t, int64(10), lastMsg.Message.Usage.CachedInputTokens)
256+
assert.InEpsilon(t, 0.005, lastMsg.Message.Cost, 0.0001)
257+
assert.Equal(t, "gpt-4", lastMsg.Message.Model)
258+
}
259+
260+
func TestUpdateLastAssistantMessageUsage_NoAssistantMessage(t *testing.T) {
261+
testAgent := &agent.Agent{}
262+
263+
s := New()
264+
265+
// Add only user message
266+
s.AddMessage(NewAgentMessage(testAgent, &chat.Message{
267+
Role: chat.MessageRoleUser,
268+
Content: "hello",
269+
}))
270+
271+
// Should not panic when no assistant message exists
272+
usage := &chat.Usage{InputTokens: 100}
273+
s.UpdateLastAssistantMessageUsage(usage, 0.01, "model")
274+
275+
// Verify nothing changed
276+
messages := s.GetAllMessages()
277+
assert.Len(t, messages, 1)
278+
assert.Equal(t, chat.MessageRoleUser, messages[0].Message.Role)
279+
}
280+
281+
func TestUpdateLastAssistantMessageUsage_UpdatesOnlyLast(t *testing.T) {
282+
testAgent := &agent.Agent{}
283+
284+
s := New()
285+
286+
// Add multiple assistant messages
287+
s.AddMessage(NewAgentMessage(testAgent, &chat.Message{
288+
Role: chat.MessageRoleAssistant,
289+
Content: "first response",
290+
Usage: &chat.Usage{InputTokens: 10},
291+
}))
292+
293+
s.AddMessage(NewAgentMessage(testAgent, &chat.Message{
294+
Role: chat.MessageRoleUser,
295+
Content: "follow up",
296+
}))
297+
298+
s.AddMessage(NewAgentMessage(testAgent, &chat.Message{
299+
Role: chat.MessageRoleAssistant,
300+
Content: "second response",
301+
}))
302+
303+
// Update usage - should only affect the last assistant message
304+
usage := &chat.Usage{InputTokens: 200}
305+
s.UpdateLastAssistantMessageUsage(usage, 0.02, "new-model")
306+
307+
// Verify only the last assistant message was updated
308+
messages := s.GetAllMessages()
309+
assert.Len(t, messages, 3)
310+
311+
// First assistant message should keep original usage
312+
assert.NotNil(t, messages[0].Message.Usage)
313+
assert.Equal(t, int64(10), messages[0].Message.Usage.InputTokens)
314+
315+
// Last assistant message should have new usage
316+
assert.NotNil(t, messages[2].Message.Usage)
317+
assert.Equal(t, int64(200), messages[2].Message.Usage.InputTokens)
318+
assert.InEpsilon(t, 0.02, messages[2].Message.Cost, 0.0001)
319+
assert.Equal(t, "new-model", messages[2].Message.Model)
320+
}

0 commit comments

Comments
 (0)