Skip to content

Commit 40b2bdc

Browse files
authored
Revert "feat(vscode-lm): add image support for VS Code LM API provider" (#11068)
1 parent 49aac7e commit 40b2bdc

7 files changed

Lines changed: 21 additions & 336 deletions

File tree

pnpm-lock.yaml

Lines changed: 2 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/api/providers/__tests__/vscode-lm.spec.ts

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import type { Mock } from "vitest"
2-
import { checkModelSupportsImages, IMAGE_CAPABLE_MODEL_PREFIXES } from "../vscode-lm"
32

43
// Mocks must come first, before imports
54
vi.mock("vscode", () => {
@@ -538,92 +537,3 @@ describe("VsCodeLmHandler", () => {
538537
})
539538
})
540539
})
541-
542-
describe("checkModelSupportsImages", () => {
543-
describe("OpenAI GPT models", () => {
544-
it("should return true for all gpt-* models (GitHub Copilot)", () => {
545-
// All GPT models in GitHub Copilot support images
546-
expect(checkModelSupportsImages("gpt", "gpt-4o")).toBe(true)
547-
expect(checkModelSupportsImages("gpt", "gpt-4.1")).toBe(true)
548-
expect(checkModelSupportsImages("gpt", "gpt-5")).toBe(true)
549-
expect(checkModelSupportsImages("gpt", "gpt-5.1")).toBe(true)
550-
expect(checkModelSupportsImages("gpt", "gpt-5.2")).toBe(true)
551-
expect(checkModelSupportsImages("gpt-mini", "gpt-5-mini")).toBe(true)
552-
expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex")).toBe(true)
553-
expect(checkModelSupportsImages("gpt-codex", "gpt-5.2-codex")).toBe(true)
554-
expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex-max")).toBe(true)
555-
expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex-mini")).toBe(true)
556-
})
557-
558-
it("should return true for o1 and o3 reasoning models", () => {
559-
expect(checkModelSupportsImages("o1", "o1-preview")).toBe(true)
560-
expect(checkModelSupportsImages("o1", "o1-mini")).toBe(true)
561-
expect(checkModelSupportsImages("o3", "o3")).toBe(true)
562-
})
563-
})
564-
565-
describe("Anthropic Claude models", () => {
566-
it("should return true for all claude-* models (GitHub Copilot)", () => {
567-
// All Claude models in GitHub Copilot support images
568-
expect(checkModelSupportsImages("claude-haiku", "claude-haiku-4.5")).toBe(true)
569-
expect(checkModelSupportsImages("claude-opus", "claude-opus-4.5")).toBe(true)
570-
expect(checkModelSupportsImages("claude-sonnet", "claude-sonnet-4")).toBe(true)
571-
expect(checkModelSupportsImages("claude-sonnet", "claude-sonnet-4.5")).toBe(true)
572-
})
573-
})
574-
575-
describe("Google Gemini models", () => {
576-
it("should return true for all gemini-* models (GitHub Copilot)", () => {
577-
// All Gemini models in GitHub Copilot support images
578-
expect(checkModelSupportsImages("gemini-pro", "gemini-2.5-pro")).toBe(true)
579-
expect(checkModelSupportsImages("gemini-flash", "gemini-3-flash-preview")).toBe(true)
580-
expect(checkModelSupportsImages("gemini-pro", "gemini-3-pro-preview")).toBe(true)
581-
})
582-
})
583-
584-
describe("non-vision models", () => {
585-
it("should return false for grok models (text-only in GitHub Copilot)", () => {
586-
// Grok is the only model family in GitHub Copilot that doesn't support images
587-
expect(checkModelSupportsImages("grok", "grok-code-fast-1")).toBe(false)
588-
})
589-
590-
it("should return false for models with non-matching prefixes", () => {
591-
// Models that don't start with gpt, claude, gemini, o1, or o3
592-
expect(checkModelSupportsImages("mistral", "mistral-large")).toBe(false)
593-
expect(checkModelSupportsImages("llama", "llama-3-70b")).toBe(false)
594-
expect(checkModelSupportsImages("unknown", "some-random-model")).toBe(false)
595-
})
596-
})
597-
598-
describe("case insensitivity", () => {
599-
it("should match regardless of case", () => {
600-
expect(checkModelSupportsImages("GPT", "GPT-4O")).toBe(true)
601-
expect(checkModelSupportsImages("CLAUDE", "CLAUDE-SONNET-4")).toBe(true)
602-
expect(checkModelSupportsImages("GEMINI", "GEMINI-2.5-PRO")).toBe(true)
603-
})
604-
})
605-
606-
describe("prefix matching", () => {
607-
it("should only match IDs that start with known prefixes", () => {
608-
// ID must START with the prefix, not just contain it
609-
expect(checkModelSupportsImages("custom", "gpt-4o")).toBe(true) // ID starts with gpt
610-
expect(checkModelSupportsImages("custom", "my-gpt-model")).toBe(false) // gpt not at start
611-
expect(checkModelSupportsImages("custom", "not-claude-model")).toBe(false) // claude not at start
612-
})
613-
})
614-
})
615-
616-
describe("IMAGE_CAPABLE_MODEL_PREFIXES", () => {
617-
it("should export the model prefixes array", () => {
618-
expect(Array.isArray(IMAGE_CAPABLE_MODEL_PREFIXES)).toBe(true)
619-
expect(IMAGE_CAPABLE_MODEL_PREFIXES.length).toBeGreaterThan(0)
620-
})
621-
622-
it("should include key model prefixes", () => {
623-
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("gpt")
624-
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("claude")
625-
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("gemini")
626-
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("o1")
627-
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("o3")
628-
})
629-
})

src/api/providers/vscode-lm.ts

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -529,18 +529,14 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
529529

530530
const modelId = this.client.id || modelParts.join(SELECTOR_SEPARATOR)
531531

532-
// Check if the model supports images based on known model families
533-
// VS Code Language Model API 1.106+ supports image inputs via LanguageModelDataPart
534-
const supportsImages = checkModelSupportsImages(this.client.family, this.client.id)
535-
536532
// Build model info with conservative defaults for missing values
537533
const modelInfo: ModelInfo = {
538534
maxTokens: -1, // Unlimited tokens by default
539535
contextWindow:
540536
typeof this.client.maxInputTokens === "number"
541537
? Math.max(0, this.client.maxInputTokens)
542538
: openAiModelInfoSaneDefaults.contextWindow,
543-
supportsImages,
539+
supportsImages: false, // VSCode Language Model API currently doesn't support image inputs
544540
supportsPromptCache: true,
545541
inputPrice: 0,
546542
outputPrice: 0,
@@ -590,43 +586,8 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
590586
}
591587
}
592588

593-
/**
594-
* Model ID prefixes that support image inputs via VS Code Language Model API.
595-
* These models support the LanguageModelDataPart.image() API introduced in VS Code 1.106+.
596-
*
597-
* All GitHub Copilot models with these prefixes support images.
598-
* Only grok-* models don't support images (text only).
599-
*
600-
* Source: https://models.dev/api.json (github-copilot provider models)
601-
*/
602-
export const IMAGE_CAPABLE_MODEL_PREFIXES = [
603-
"gpt", // All GPT models (gpt-4o, gpt-4.1, gpt-5, gpt-5.1, gpt-5.2, gpt-5-mini, gpt-5.1-codex, etc.)
604-
"claude", // All Claude models (claude-haiku-4.5, claude-opus-4.5, claude-sonnet-4, claude-sonnet-4.5)
605-
"gemini", // All Gemini models (gemini-2.5-pro, gemini-3-flash-preview, gemini-3-pro-preview)
606-
"o1", // OpenAI o1 reasoning models
607-
"o3", // OpenAI o3 reasoning models
608-
]
609-
610-
/**
611-
* Checks if a model supports image inputs based on its model ID.
612-
* Uses prefix matching against known image-capable model families.
613-
*
614-
* @param _family The model family (unused, kept for API compatibility)
615-
* @param id The model ID
616-
* @returns true if the model supports image inputs
617-
*/
618-
export function checkModelSupportsImages(_family: string, id: string): boolean {
619-
const idLower = id.toLowerCase()
620-
return IMAGE_CAPABLE_MODEL_PREFIXES.some((prefix) => idLower.startsWith(prefix))
621-
}
622-
623-
// Static blacklist of VS Code Language Model IDs that should be excluded from the model list
624-
// e.g. because they don't support native tool calling or will never work
625-
const VSCODE_LM_STATIC_BLACKLIST: string[] = [
626-
"claude-3.7-sonnet",
627-
"claude-3.7-sonnet-thought",
628-
"claude-opus-41", // Does not support native tool calling
629-
]
589+
// Static blacklist of VS Code Language Model IDs that should be excluded from the model list e.g. because they will never work
590+
const VSCODE_LM_STATIC_BLACKLIST: string[] = ["claude-3.7-sonnet", "claude-3.7-sonnet-thought"]
630591

631592
export async function getVsCodeLmModels() {
632593
try {

src/api/transform/__tests__/vscode-lm-format.spec.ts

Lines changed: 5 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,7 @@ interface MockLanguageModelToolCallPart {
2626
interface MockLanguageModelToolResultPart {
2727
type: "tool_result"
2828
callId: string
29-
content: (MockLanguageModelTextPart | MockLanguageModelDataPart)[]
30-
}
31-
32-
interface MockLanguageModelDataPart {
33-
type: "data"
34-
data: Uint8Array
35-
mimeType: string
29+
content: MockLanguageModelTextPart[]
3630
}
3731

3832
// Mock vscode namespace
@@ -60,32 +54,10 @@ vitest.mock("vscode", () => {
6054
type = "tool_result"
6155
constructor(
6256
public callId: string,
63-
public content: (MockLanguageModelTextPart | MockLanguageModelDataPart)[],
57+
public content: MockLanguageModelTextPart[],
6458
) {}
6559
}
6660

67-
class MockLanguageModelDataPart {
68-
type = "data"
69-
constructor(
70-
public data: Uint8Array,
71-
public mimeType: string,
72-
) {}
73-
74-
static image(data: Uint8Array, mime: string) {
75-
return new MockLanguageModelDataPart(data, mime)
76-
}
77-
78-
static json(value: any, mime?: string) {
79-
const bytes = new TextEncoder().encode(JSON.stringify(value))
80-
return new MockLanguageModelDataPart(bytes, mime || "application/json")
81-
}
82-
83-
static text(value: string, mime?: string) {
84-
const bytes = new TextEncoder().encode(value)
85-
return new MockLanguageModelDataPart(bytes, mime || "text/plain")
86-
}
87-
}
88-
8961
return {
9062
LanguageModelChatMessage: {
9163
Assistant: vitest.fn((content) => ({
@@ -103,7 +75,6 @@ vitest.mock("vscode", () => {
10375
LanguageModelTextPart: MockLanguageModelTextPart,
10476
LanguageModelToolCallPart: MockLanguageModelToolCallPart,
10577
LanguageModelToolResultPart: MockLanguageModelToolResultPart,
106-
LanguageModelDataPart: MockLanguageModelDataPart,
10778
}
10879
})
10980

@@ -179,7 +150,7 @@ describe("convertToVsCodeLmMessages", () => {
179150
expect(toolCall.type).toBe("tool_call")
180151
})
181152

182-
it("should convert image blocks to LanguageModelDataPart", () => {
153+
it("should handle image blocks with appropriate placeholders", () => {
183154
const messages: Anthropic.Messages.MessageParam[] = [
184155
{
185156
role: "user",
@@ -190,7 +161,7 @@ describe("convertToVsCodeLmMessages", () => {
190161
source: {
191162
type: "base64",
192163
media_type: "image/png",
193-
data: "dGVzdA==", // "test" in base64
164+
data: "base64data",
194165
},
195166
},
196167
],
@@ -200,123 +171,8 @@ describe("convertToVsCodeLmMessages", () => {
200171
const result = convertToVsCodeLmMessages(messages)
201172

202173
expect(result).toHaveLength(1)
203-
expect(result[0].content).toHaveLength(2)
204-
205-
// First part should be text
206-
const textPart = result[0].content[0] as MockLanguageModelTextPart
207-
expect(textPart.type).toBe("text")
208-
expect(textPart.value).toBe("Look at this:")
209-
210-
// Second part should be a LanguageModelDataPart for the image
211-
const imagePart = result[0].content[1] as unknown as MockLanguageModelDataPart
212-
expect(imagePart.type).toBe("data")
213-
expect(imagePart.mimeType).toBe("image/png")
214-
expect(imagePart.data).toBeInstanceOf(Uint8Array)
215-
})
216-
217-
it("should handle images in tool results", () => {
218-
const messages: Anthropic.Messages.MessageParam[] = [
219-
{
220-
role: "user",
221-
content: [
222-
{
223-
type: "tool_result",
224-
tool_use_id: "tool-1",
225-
content: [
226-
{ type: "text", text: "Screenshot result:" },
227-
{
228-
type: "image",
229-
source: {
230-
type: "base64",
231-
media_type: "image/jpeg",
232-
data: "dGVzdA==",
233-
},
234-
},
235-
],
236-
},
237-
],
238-
},
239-
]
240-
241-
const result = convertToVsCodeLmMessages(messages)
242-
243-
expect(result).toHaveLength(1)
244-
expect(result[0].content).toHaveLength(1)
245-
246-
const toolResult = result[0].content[0] as MockLanguageModelToolResultPart
247-
expect(toolResult.type).toBe("tool_result")
248-
expect(toolResult.content).toHaveLength(2)
249-
250-
// First item in tool result should be text
251-
const textPart = toolResult.content[0] as MockLanguageModelTextPart
252-
expect(textPart.type).toBe("text")
253-
254-
// Second item should be image data
255-
const imagePart = toolResult.content[1] as MockLanguageModelDataPart
256-
expect(imagePart.type).toBe("data")
257-
expect(imagePart.mimeType).toBe("image/jpeg")
258-
})
259-
260-
it("should return text placeholder for URL-based images", () => {
261-
const messages: Anthropic.Messages.MessageParam[] = [
262-
{
263-
role: "user",
264-
content: [
265-
{ type: "text", text: "Check this image:" },
266-
{
267-
type: "image",
268-
source: {
269-
type: "url",
270-
url: "https://example.com/image.png",
271-
} as any,
272-
},
273-
],
274-
},
275-
]
276-
277-
const result = convertToVsCodeLmMessages(messages)
278-
279-
expect(result).toHaveLength(1)
280-
expect(result[0].content).toHaveLength(2)
281-
282-
// First part should be text
283-
const textPart = result[0].content[0] as MockLanguageModelTextPart
284-
expect(textPart.type).toBe("text")
285-
expect(textPart.value).toBe("Check this image:")
286-
287-
// Second part should be a text placeholder (not an empty DataPart)
288174
const imagePlaceholder = result[0].content[1] as MockLanguageModelTextPart
289-
expect(imagePlaceholder.type).toBe("text")
290-
expect(imagePlaceholder.value).toContain("URL not supported")
291-
expect(imagePlaceholder.value).toContain("https://example.com/image.png")
292-
})
293-
294-
it("should return text placeholder for unknown image source types", () => {
295-
const messages: Anthropic.Messages.MessageParam[] = [
296-
{
297-
role: "user",
298-
content: [
299-
{
300-
type: "image",
301-
source: {
302-
type: "unknown",
303-
media_type: "image/png",
304-
data: "", // Required by type but ignored for unknown source types
305-
} as any,
306-
},
307-
],
308-
},
309-
]
310-
311-
const result = convertToVsCodeLmMessages(messages)
312-
313-
expect(result).toHaveLength(1)
314-
expect(result[0].content).toHaveLength(1)
315-
316-
// Should return a text placeholder for unknown source types
317-
const placeholder = result[0].content[0] as MockLanguageModelTextPart
318-
expect(placeholder.type).toBe("text")
319-
expect(placeholder.value).toContain("unsupported source type")
175+
expect(imagePlaceholder.value).toContain("[Image (base64): image/png not supported by VSCode LM API]")
320176
})
321177
})
322178

0 commit comments

Comments
 (0)