Skip to content

Commit 118df07

Browse files
authored
Merge pull request #1516 from dgageot/eval-base-image
Pass a custom base image to cagent eval
2 parents 1bb6e30 + 051576c commit 118df07

6 files changed

Lines changed: 38 additions & 11 deletions

File tree

cmd/root/eval.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type evalFlags struct {
2828
judgeModel string
2929
outputDir string
3030
only []string
31+
baseImage string
3132
}
3233

3334
func newEvalCmd() *cobra.Command {
@@ -46,6 +47,7 @@ func newEvalCmd() *cobra.Command {
4647
cmd.Flags().StringVar(&flags.judgeModel, "judge-model", defaultJudgeModel, "Model to use for relevance checking (format: provider/model)")
4748
cmd.Flags().StringVar(&flags.outputDir, "output", "", "Directory for results and logs (default: <eval-dir>/results)")
4849
cmd.Flags().StringSliceVar(&flags.only, "only", nil, "Only run evaluations with file names matching these patterns (can be specified multiple times)")
50+
cmd.Flags().StringVar(&flags.baseImage, "base-image", "", "Custom base Docker image for running evaluations")
4951

5052
return cmd
5153
}
@@ -137,7 +139,7 @@ func (f *evalFlags) runEvalCommand(cmd *cobra.Command, args []string) error {
137139
}
138140

139141
// Run evaluation
140-
run, evalErr := evaluation.EvaluateWithName(ctx, teeOut, isTTY, ttyFd, runName, agentFilename, evalsDir, &f.runConfig, f.concurrency, judgeModel, f.only)
142+
run, evalErr := evaluation.EvaluateWithName(ctx, teeOut, isTTY, ttyFd, runName, agentFilename, evalsDir, &f.runConfig, f.concurrency, judgeModel, f.only, f.baseImage)
141143
if run == nil {
142144
return evalErr
143145
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# syntax=docker/dockerfile:1
2+
3+
FROM {{.BaseImage}}
4+
COPY --from=docker/cagent:edge /cagent /
5+
WORKDIR /working_dir
6+
ENV TELEMETRY_ENABLED=false
7+
ENV CAGENT_HIDE_TELEMETRY_BANNER=1
8+
ENTRYPOINT ["/cagent", "exec", "--yolo", "--json"]
9+
{{if .CopyWorkingDir}}COPY . ./
10+
{{end}}

pkg/evaluation/Dockerfile.template

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,12 @@ exec "$@"
2323
EOF
2424
RUN chmod +x /run.sh
2525

26-
FROM docker/cagent:edge AS cagent
27-
2826
FROM scratch
2927
COPY --from=dind / /
30-
COPY --from=cagent /cagent /
28+
COPY --from=docker/cagent:edge /cagent /
3129
WORKDIR /working_dir
32-
{{if .CopyWorkingDir}}COPY . ./
33-
{{end}}ENV TELEMETRY_ENABLED=false
30+
ENV TELEMETRY_ENABLED=false
3431
ENV CAGENT_HIDE_TELEMETRY_BANNER=1
3532
ENTRYPOINT ["/run.sh", "/cagent", "exec", "--yolo", "--json"]
33+
{{if .CopyWorkingDir}}COPY . ./
34+
{{end}}

pkg/evaluation/build.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,18 @@ var (
1717
//go:embed Dockerfile.template
1818
dockerfileTmpl string
1919

20-
dockerfileTemplate = template.Must(template.New("Dockerfile").Parse(dockerfileTmpl))
20+
//go:embed Dockerfile.custom.template
21+
dockerfileCustomTmpl string
22+
23+
dockerfileTemplate = template.Must(template.New("Dockerfile").Parse(dockerfileTmpl))
24+
dockerfileCustomTemplate = template.Must(template.New("DockerfileCustom").Parse(dockerfileCustomTmpl))
2125
)
2226

2327
func (r *Runner) buildEvalImage(ctx context.Context, workingDir string) (string, error) {
2428
var buildContext string
2529
var data struct {
2630
CopyWorkingDir bool
31+
BaseImage string
2732
}
2833

2934
if workingDir == "" {
@@ -37,8 +42,15 @@ func (r *Runner) buildEvalImage(ctx context.Context, workingDir string) (string,
3742
data.CopyWorkingDir = true
3843
}
3944

45+
// Choose template based on whether a custom base image is provided
46+
tmpl := dockerfileTemplate
47+
if r.baseImage != "" {
48+
tmpl = dockerfileCustomTemplate
49+
data.BaseImage = r.baseImage
50+
}
51+
4052
var dockerfile bytes.Buffer
41-
if err := dockerfileTemplate.Execute(&dockerfile, data); err != nil {
53+
if err := tmpl.Execute(&dockerfile, data); err != nil {
4254
return "", fmt.Errorf("executing dockerfile template: %w", err)
4355
}
4456

pkg/evaluation/eval.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ type Runner struct {
3535
envProvider environment.Provider
3636
ttyFd int
3737
only []string
38+
baseImage string
3839
}
3940

4041
// NewRunner creates a new evaluation runner.
@@ -48,16 +49,17 @@ func NewRunner(agentSource config.Source, runConfig *config.RuntimeConfig, evals
4849
envProvider: runConfig.EnvProvider(),
4950
ttyFd: cfg.TTYFd,
5051
only: cfg.Only,
52+
baseImage: cfg.BaseImage,
5153
}
5254
}
5355

5456
// Evaluate is the main entry point for running evaluations.
55-
func Evaluate(ctx context.Context, out io.Writer, isTTY bool, ttyFd int, agentFilename, evalsDir string, runConfig *config.RuntimeConfig, concurrency int, judgeModel provider.Provider, only []string) (*EvalRun, error) {
56-
return EvaluateWithName(ctx, out, isTTY, ttyFd, GenerateRunName(), agentFilename, evalsDir, runConfig, concurrency, judgeModel, only)
57+
func Evaluate(ctx context.Context, out io.Writer, isTTY bool, ttyFd int, agentFilename, evalsDir string, runConfig *config.RuntimeConfig, concurrency int, judgeModel provider.Provider, only []string, baseImage string) (*EvalRun, error) {
58+
return EvaluateWithName(ctx, out, isTTY, ttyFd, GenerateRunName(), agentFilename, evalsDir, runConfig, concurrency, judgeModel, only, baseImage)
5759
}
5860

5961
// EvaluateWithName runs evaluations with a specified run name.
60-
func EvaluateWithName(ctx context.Context, out io.Writer, isTTY bool, ttyFd int, runName, agentFilename, evalsDir string, runConfig *config.RuntimeConfig, concurrency int, judgeModel provider.Provider, only []string) (*EvalRun, error) {
62+
func EvaluateWithName(ctx context.Context, out io.Writer, isTTY bool, ttyFd int, runName, agentFilename, evalsDir string, runConfig *config.RuntimeConfig, concurrency int, judgeModel provider.Provider, only []string, baseImage string) (*EvalRun, error) {
6163
agentSource, err := config.Resolve(agentFilename)
6264
if err != nil {
6365
return nil, fmt.Errorf("resolving agent: %w", err)
@@ -68,6 +70,7 @@ func EvaluateWithName(ctx context.Context, out io.Writer, isTTY bool, ttyFd int,
6870
JudgeModel: judgeModel,
6971
TTYFd: ttyFd,
7072
Only: only,
73+
BaseImage: baseImage,
7174
})
7275

7376
fmt.Fprintf(out, "Evaluation run: %s\n", runName)

pkg/evaluation/types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ type Config struct {
114114
Concurrency int // Number of concurrent runs (0 = number of CPUs)
115115
TTYFd int // File descriptor for terminal size queries (e.g., int(os.Stdout.Fd()))
116116
Only []string // Only run evaluations matching these patterns
117+
BaseImage string // Custom base Docker image for running evaluations
117118
}
118119

119120
// Session helper functions

0 commit comments

Comments
 (0)