@@ -11,10 +11,8 @@ import (
1111 "sync"
1212
1313 "github.com/docker/docker-agent/pkg/chat"
14- "github.com/docker/docker-agent/pkg/config"
1514 "github.com/docker/docker-agent/pkg/config/latest"
1615 "github.com/docker/docker-agent/pkg/model/provider"
17- "github.com/docker/docker-agent/pkg/model/provider/options"
1816)
1917
2018// relevancePrompt is the prompt template for the judge model to evaluate responses.
@@ -58,26 +56,17 @@ var judgeResponseSchema = &latest.StructuredOutput{
5856// Judge runs LLM-as-a-judge relevance checks concurrently.
5957type Judge struct {
6058 model provider.Provider
61- runConfig * config.RuntimeConfig
6259 concurrency int
63-
64- // judgeWithSchema is a provider pre-configured with structured output.
65- // Created lazily on first use and reused across all relevance checks.
66- // Protected by judgeWithSchemaMu; only cached on success so that
67- // transient errors (e.g. context cancellation) can be retried.
68- judgeWithSchema provider.Provider
69- judgeWithSchemaMu sync.Mutex
7060}
7161
7262// NewJudge creates a new Judge that runs relevance checks with the given concurrency.
7363// Concurrency defaults to 1 if n < 1.
74- func NewJudge (model provider.Provider , runConfig * config. RuntimeConfig , concurrency int ) * Judge {
64+ func NewJudge (model provider.Provider , concurrency int ) * Judge {
7565 if concurrency < 1 {
7666 concurrency = 1
7767 }
7868 return & Judge {
7969 model : model ,
80- runConfig : runConfig ,
8170 concurrency : concurrency ,
8271 }
8372}
@@ -180,48 +169,13 @@ func (j *Judge) CheckRelevance(ctx context.Context, response string, criteria []
180169 return passed , failed , nil
181170}
182171
183- // getOrCreateJudgeWithSchema returns a provider pre-configured with structured output.
184- // The provider is created once and reused across all relevance checks.
185- // Unlike sync.Once, transient failures (e.g. context cancellation) are not
186- // cached, allowing subsequent calls to retry.
187- func (j * Judge ) getOrCreateJudgeWithSchema (ctx context.Context ) (provider.Provider , error ) {
188- j .judgeWithSchemaMu .Lock ()
189- defer j .judgeWithSchemaMu .Unlock ()
190-
191- if j .judgeWithSchema != nil {
192- return j .judgeWithSchema , nil
193- }
194-
195- opts := []options.Opt {
196- options .WithStructuredOutput (judgeResponseSchema ),
197- options .WithThinking (false ),
198- }
199- if j .runConfig .ModelsGateway != "" {
200- opts = append (opts , options .WithGateway (j .runConfig .ModelsGateway ))
201- }
202-
203- modelCfg := j .model .BaseConfig ().ModelConfig
204- p , err := provider .New (ctx , & modelCfg , j .runConfig .EnvProvider (), opts ... )
205- if err != nil {
206- return nil , err
207- }
208-
209- j .judgeWithSchema = p
210- return j .judgeWithSchema , nil
211- }
212-
213172// checkSingle checks a single relevance criterion against the response.
214173// It returns whether the check passed, the reason provided by the judge, and any error.
215174func (j * Judge ) checkSingle (ctx context.Context , response , criterion string ) (passed bool , reason string , err error ) {
216- judgeWithSchema , err := j .getOrCreateJudgeWithSchema (ctx )
217- if err != nil {
218- return false , "" , fmt .Errorf ("creating judge provider with structured output: %w" , err )
219- }
220-
221175 prompt := fmt .Sprintf (relevancePrompt , response , criterion )
222176 messages := []chat.Message {{Role : chat .MessageRoleUser , Content : prompt }}
223177
224- stream , err := judgeWithSchema .CreateChatCompletionStream (ctx , messages , nil )
178+ stream , err := j . model .CreateChatCompletionStream (ctx , messages , nil )
225179 if err != nil {
226180 return false , "" , fmt .Errorf ("creating chat completion: %w" , err )
227181 }
0 commit comments