Skip to content

Commit c47cdd7

Browse files
authored
better handling of too many images in chat (#310437)
* better handling of too many images in chat * address review: handle no-user-message fallback, assert filter output, cover tool-role images * factor filterHistoryImages into a pure helper with direct tests * restore validateAndFilterImages delegate so existing tests are unchanged * address comments, throw early
1 parent 602484a commit c47cdd7

File tree

4 files changed

+466
-54
lines changed

4 files changed

+466
-54
lines changed

extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
* Licensed under the MIT License. See License.txt in the project root for license information.
44
*--------------------------------------------------------------------------------------------*/
55
import { RequestMetadata, RequestType } from '@vscode/copilot-api';
6-
import * as l10n from '@vscode/l10n';
76
import { OpenAI, Raw } from '@vscode/prompt-tsx';
87
import type { CancellationToken } from 'vscode';
98
import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';
@@ -30,11 +29,12 @@ import { ITelemetryService, TelemetryProperties } from '../../telemetry/common/t
3029
import { TelemetryData } from '../../telemetry/common/telemetryData';
3130
import { ITokenizerProvider } from '../../tokenizer/node/tokenizer';
3231
import { ICAPIClientService } from '../common/capiClient';
33-
import { isGeminiFamily, modelSupportsContextEditing, modelSupportsToolSearch } from '../common/chatModelCapabilities';
32+
import { isAnthropicFamily, isGeminiFamily, modelSupportsContextEditing, modelSupportsToolSearch } from '../common/chatModelCapabilities';
3433
import { IDomainService } from '../common/domainService';
3534
import { CustomModel, IChatModelInformation, ModelSupportedEndpoint } from '../common/endpointProvider';
3635
import { createMessagesRequestBody, processResponseFromMessagesEndpoint } from './messagesApi';
3736
import { createResponsesRequestBody, getResponsesApiCompactionThreshold, processResponseFromChatEndpoint } from './responsesApi';
37+
import { filterHistoryImages } from './imageLimits';
3838

3939
/**
4040
* The default processor for the stream format from CAPI
@@ -288,13 +288,10 @@ export class ChatEndpoint implements IChatEndpoint {
288288
}
289289

290290
createRequestBody(options: ICreateEndpointBodyOptions): IEndpointBody {
291-
// Validate image count if endpoint has max_prompt_images limit (Gemini only for now)
292-
if (isGeminiFamily(this) && this.maxPromptImages !== undefined) {
293-
const imageCount = this.countImages(options.messages, this.maxPromptImages);
294-
if (imageCount > this.maxPromptImages) {
295-
const errorMsg = l10n.t('Too many images in request: {0} images provided, but the model supports a maximum of {1} images.', imageCount, this.maxPromptImages);
296-
throw new Error(errorMsg);
297-
}
291+
// Determine per-model image limit for APIs with known restrictions
292+
const imageLimit = this.getImageLimit();
293+
if (imageLimit !== undefined) {
294+
options = { ...options, messages: this.validateAndFilterImages(options.messages, imageLimit) };
298295
}
299296

300297
if (this.useResponsesApi) {
@@ -309,22 +306,27 @@ export class ChatEndpoint implements IChatEndpoint {
309306
}
310307
}
311308

312-
private countImages(messages: Raw.ChatMessage[], maxAllowed?: number): number {
313-
let imageCount = 0;
314-
for (const message of messages) {
315-
if (Array.isArray(message.content)) {
316-
for (const part of message.content) {
317-
if (part.type === Raw.ChatCompletionContentPartKind.Image) {
318-
imageCount++;
319-
// Early exit if we've already exceeded the limit
320-
if (maxAllowed !== undefined && imageCount > maxAllowed) {
321-
return imageCount;
322-
}
323-
}
324-
}
325-
}
309+
/**
310+
* Returns the model-specific image limit, or `undefined` if no limit applies.
311+
* Anthropic Messages API allows up to 20 images per request; Gemini allows up to 10.
312+
* These are hardcoded based on API documentation rather than model metadata to
313+
* avoid being clamped by unreliable server-provided values.
314+
*/
315+
private getImageLimit(): number | undefined {
316+
if (this.useMessagesApi && isAnthropicFamily(this)) {
317+
return 20;
318+
}
319+
if (isGeminiFamily(this)) {
320+
return 10;
326321
}
327-
return imageCount;
322+
return undefined;
323+
}
324+
325+
/**
326+
* Thin wrapper around {@link filterHistoryImages} retained for test ergonomics.
327+
*/
328+
private validateAndFilterImages(messages: Raw.ChatMessage[], maxImages: number): Raw.ChatMessage[] {
329+
return filterHistoryImages(messages, maxImages);
328330
}
329331

330332
protected getCompletionsCallback(): RawMessageConversionCallback | undefined {
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
import * as l10n from '@vscode/l10n';
6+
import { Raw } from '@vscode/prompt-tsx';
7+
8+
/**
9+
* Model-facing placeholder substituted for dropped history images.
10+
* Intentionally not localized — this text is sent to the model, not the user.
11+
*/
12+
const IMAGE_PLACEHOLDER_TEXT = '[Image omitted from conversation history due to model limit.]';
13+
14+
/**
15+
* Silently drops the oldest images from history when the total number of images
16+
* in the conversation exceeds `maxImages`. Images belonging to the current turn
17+
* (the last user message and anything after it, e.g. recent tool results) are
18+
* always preserved.
19+
*
20+
* If the current turn alone exceeds the limit, throws a localized error rather
21+
* than sending a request we know will be rejected with an opaque server error.
22+
*
23+
* @returns A (possibly filtered) copy of messages. The original array is never mutated.
24+
*/
25+
export function filterHistoryImages(messages: Raw.ChatMessage[], maxImages: number): Raw.ChatMessage[] {
26+
// Anchor the current turn at the last user message; anything at or after this
27+
// index is treated as "current turn" and its images are never filtered.
28+
let lastUserIdx = -1;
29+
for (let i = messages.length - 1; i >= 0; i--) {
30+
if (messages[i].role === Raw.ChatRole.User) {
31+
lastUserIdx = i;
32+
break;
33+
}
34+
}
35+
36+
// Corner case: no user message at all (e.g. system-only history). Treat the
37+
// last message as the current turn so we still filter earlier images.
38+
if (lastUserIdx === -1 && messages.length > 0) {
39+
lastUserIdx = messages.length - 1;
40+
}
41+
42+
// Count images in the current turn (the last user message and anything after it).
43+
let currentTurnImages = 0;
44+
for (let i = Math.max(lastUserIdx, 0); i < messages.length; i++) {
45+
const content = messages[i].content;
46+
if (!Array.isArray(content)) {
47+
continue;
48+
}
49+
for (const part of content) {
50+
if (part.type === Raw.ChatCompletionContentPartKind.Image) {
51+
currentTurnImages++;
52+
}
53+
}
54+
}
55+
56+
// Count total images across all messages
57+
let totalImages = 0;
58+
for (const message of messages) {
59+
if (Array.isArray(message.content)) {
60+
for (const part of message.content) {
61+
if (part.type === Raw.ChatCompletionContentPartKind.Image) {
62+
totalImages++;
63+
}
64+
}
65+
}
66+
}
67+
68+
// No filtering needed if total is within the limit
69+
if (totalImages <= maxImages) {
70+
return messages;
71+
}
72+
73+
// Fail fast with a clear, localized error when the current turn alone exceeds
74+
// the limit — otherwise we'd send a request the server will reject with an
75+
// opaque error. Silent history filtering is only safe when dropping history
76+
// images can bring the total down to the limit.
77+
if (currentTurnImages > maxImages) {
78+
throw new Error(l10n.t('Too many images in request: {0} images provided, but the model supports a maximum of {1} images.', currentTurnImages, maxImages));
79+
}
80+
81+
// Walk backward through history (before the current turn), keeping the
82+
// most recent images and replacing the oldest with placeholders.
83+
let historyBudget = maxImages - currentTurnImages;
84+
85+
// Collect keep/drop decisions by walking backward through history
86+
const historyImageDecisions = new Map<string, boolean>(); // "msgIdx:partIdx" -> keep
87+
for (let i = lastUserIdx - 1; i >= 0; i--) {
88+
if (!Array.isArray(messages[i].content)) {
89+
continue;
90+
}
91+
for (let j = messages[i].content.length - 1; j >= 0; j--) {
92+
if (messages[i].content[j].type === Raw.ChatCompletionContentPartKind.Image) {
93+
const key = `${i}:${j}`;
94+
if (historyBudget > 0) {
95+
historyImageDecisions.set(key, true);
96+
historyBudget--;
97+
} else {
98+
historyImageDecisions.set(key, false);
99+
}
100+
}
101+
}
102+
}
103+
104+
// Build filtered messages, replacing dropped images with text placeholders
105+
return messages.map((message, msgIdx) => {
106+
if (msgIdx >= lastUserIdx) {
107+
return message;
108+
}
109+
if (!Array.isArray(message.content)) {
110+
return message;
111+
}
112+
if (!message.content.some(p => p.type === Raw.ChatCompletionContentPartKind.Image)) {
113+
return message;
114+
}
115+
return {
116+
...message,
117+
content: message.content.map((part, partIdx) => {
118+
if (part.type !== Raw.ChatCompletionContentPartKind.Image) {
119+
return part;
120+
}
121+
if (historyImageDecisions.get(`${msgIdx}:${partIdx}`)) {
122+
return part;
123+
}
124+
return { type: Raw.ChatCompletionContentPartKind.Text, text: IMAGE_PLACEHOLDER_TEXT };
125+
})
126+
};
127+
});
128+
}

0 commit comments

Comments
 (0)