Skip to content

Commit 644c749

Browse files
committed
Support different number of tool calls
Signed-off-by: David Gageot <david.gageot@docker.com>
1 parent 54b26e9 commit 644c749

2 files changed

Lines changed: 4 additions & 3 deletions

File tree

cmd/root/eval.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ func (f *evalFlags) runEvalCommand(cmd *cobra.Command, args []string) error {
5656
out.Printf("Eval file: %s\n", result.EvalFile)
5757
out.Printf("Tool trajectory score: %f\n", result.Score.ToolTrajectoryScore)
5858
out.Printf("Rouge-1 score: %f\n", result.Score.Rouge1Score)
59-
6059
})
6160
return err
6261
}

pkg/evaluation/score.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,20 @@ func rouge1(expected, actual string) float64 {
6767
}
6868

6969
func toolTrajectoryScore(expectedToolMessages, actualToolMessages []session.Message) float64 {
70+
maximum := 0.0
7071
score := 0.0
7172

72-
for i := range expectedToolMessages {
73+
for i := range min(len(expectedToolMessages), len(actualToolMessages)) {
7374
expected := expectedToolMessages[i]
7475
actual := actualToolMessages[i]
7576

7677
for j := range actual.Message.ToolCalls {
78+
maximum += 1.0
7779
if actual.Message.ToolCalls[j].Function.Name == expected.Message.ToolCalls[j].Function.Name {
7880
score += 1.0
7981
}
8082
}
8183
}
8284

83-
return score / float64(len(expectedToolMessages))
85+
return score / maximum
8486
}

0 commit comments

Comments
 (0)