microsoft
diff --git a/‎extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts‎
Lines changed: 26 additions & 24 deletions b/‎extensions/copilot/src/platform/endpoint/node/chatEndpoint.ts‎
Lines changed: 26 additions & 24 deletions
diff --git a/‎extensions/copilot/src/platform/endpoint/node/imageLimits.ts‎
Lines changed: 128 additions & 0 deletions b/‎extensions/copilot/src/platform/endpoint/node/imageLimits.ts‎
Lines changed: 128 additions & 0 deletions
@@ -3,7 +3,6 @@
  *  Licensed under the MIT License. See License.txt in the project root for license information.
  *--------------------------------------------------------------------------------------------*/
 import { RequestMetadata, RequestType } from '@vscode/copilot-api';
-import * as l10n from '@vscode/l10n';
 import { OpenAI, Raw } from '@vscode/prompt-tsx';
 import type { CancellationToken } from 'vscode';
 import { ITokenizer, TokenizerType } from '../../../util/common/tokenizer';
@@ -30,11 +29,12 @@ import { ITelemetryService, TelemetryProperties } from '../../telemetry/common/t
 import { TelemetryData } from '../../telemetry/common/telemetryData';
 import { ITokenizerProvider } from '../../tokenizer/node/tokenizer';
 import { ICAPIClientService } from '../common/capiClient';
-import { isGeminiFamily, modelSupportsContextEditing, modelSupportsToolSearch } from '../common/chatModelCapabilities';
+import { isAnthropicFamily, isGeminiFamily, modelSupportsContextEditing, modelSupportsToolSearch } from '../common/chatModelCapabilities';
 import { IDomainService } from '../common/domainService';
 import { CustomModel, IChatModelInformation, ModelSupportedEndpoint } from '../common/endpointProvider';
 import { createMessagesRequestBody, processResponseFromMessagesEndpoint } from './messagesApi';
 import { createResponsesRequestBody, getResponsesApiCompactionThreshold, processResponseFromChatEndpoint } from './responsesApi';
+import { filterHistoryImages } from './imageLimits';
 
 /**
  * The default processor for the stream format from CAPI
@@ -288,13 +288,10 @@ export class ChatEndpoint implements IChatEndpoint {
 	}
 
 	createRequestBody(options: ICreateEndpointBodyOptions): IEndpointBody {
-		// Validate image count if endpoint has max_prompt_images limit (Gemini only for now)
-		if (isGeminiFamily(this) && this.maxPromptImages !== undefined) {
-			const imageCount = this.countImages(options.messages, this.maxPromptImages);
-			if (imageCount > this.maxPromptImages) {
-				const errorMsg = l10n.t('Too many images in request: {0} images provided, but the model supports a maximum of {1} images.', imageCount, this.maxPromptImages);
-				throw new Error(errorMsg);
-			}
+		// Determine per-model image limit for APIs with known restrictions
+		const imageLimit = this.getImageLimit();
+		if (imageLimit !== undefined) {
+			options = { ...options, messages: this.validateAndFilterImages(options.messages, imageLimit) };
 		}
 
 		if (this.useResponsesApi) {
@@ -309,22 +306,27 @@ export class ChatEndpoint implements IChatEndpoint {
 		}
 	}
 
-	private countImages(messages: Raw.ChatMessage[], maxAllowed?: number): number {
-		let imageCount = 0;
-		for (const message of messages) {
-			if (Array.isArray(message.content)) {
-				for (const part of message.content) {
-					if (part.type === Raw.ChatCompletionContentPartKind.Image) {
-						imageCount++;
-						// Early exit if we've already exceeded the limit
-						if (maxAllowed !== undefined && imageCount > maxAllowed) {
-							return imageCount;
-						}
-					}
-				}
-			}
+	/**
+	 * Returns the model-specific image limit, or `undefined` if no limit applies.
+	 * Anthropic Messages API allows up to 20 images per request; Gemini allows up to 10.
+	 * These are hardcoded based on API documentation rather than model metadata to
+	 * avoid being clamped by unreliable server-provided values.
+	 */
+	private getImageLimit(): number | undefined {
+		if (this.useMessagesApi && isAnthropicFamily(this)) {
+			return 20;
+		}
+		if (isGeminiFamily(this)) {
+			return 10;
 		}
-		return imageCount;
+		return undefined;
+	}
+
+	/**
+	 * Thin wrapper around {@link filterHistoryImages} retained for test ergonomics.
+	 */
+	private validateAndFilterImages(messages: Raw.ChatMessage[], maxImages: number): Raw.ChatMessage[] {
+		return filterHistoryImages(messages, maxImages);
 	}
 
 	protected getCompletionsCallback(): RawMessageConversionCallback | undefined {
 
@@ -0,0 +1,128 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+import * as l10n from '@vscode/l10n';
+import { Raw } from '@vscode/prompt-tsx';
+
+/**
+ * Model-facing placeholder substituted for dropped history images.
+ * Intentionally not localized — this text is sent to the model, not the user.
+ */
+const IMAGE_PLACEHOLDER_TEXT = '[Image omitted from conversation history due to model limit.]';
+
+/**
+ * Silently drops the oldest images from history when the total number of images
+ * in the conversation exceeds `maxImages`. Images belonging to the current turn
+ * (the last user message and anything after it, e.g. recent tool results) are
+ * always preserved.
+ *
+ * If the current turn alone exceeds the limit, throws a localized error rather
+ * than sending a request we know will be rejected with an opaque server error.
+ *
+ * @returns A (possibly filtered) copy of messages. The original array is never mutated.
+ */
+export function filterHistoryImages(messages: Raw.ChatMessage[], maxImages: number): Raw.ChatMessage[] {
+	// Anchor the current turn at the last user message; anything at or after this
+	// index is treated as "current turn" and its images are never filtered.
+	let lastUserIdx = -1;
+	for (let i = messages.length - 1; i >= 0; i--) {
+		if (messages[i].role === Raw.ChatRole.User) {
+			lastUserIdx = i;
+			break;
+		}
+	}
+
+	// Corner case: no user message at all (e.g. system-only history). Treat the
+	// last message as the current turn so we still filter earlier images.
+	if (lastUserIdx === -1 && messages.length > 0) {
+		lastUserIdx = messages.length - 1;
+	}
+
+	// Count images in the current turn (the last user message and anything after it).
+	let currentTurnImages = 0;
+	for (let i = Math.max(lastUserIdx, 0); i < messages.length; i++) {
+		const content = messages[i].content;
+		if (!Array.isArray(content)) {
+			continue;
+		}
+		for (const part of content) {
+			if (part.type === Raw.ChatCompletionContentPartKind.Image) {
+				currentTurnImages++;
+			}
+		}
+	}
+
+	// Count total images across all messages
+	let totalImages = 0;
+	for (const message of messages) {
+		if (Array.isArray(message.content)) {
+			for (const part of message.content) {
+				if (part.type === Raw.ChatCompletionContentPartKind.Image) {
+					totalImages++;
+				}
+			}
+		}
+	}
+
+	// No filtering needed if total is within the limit
+	if (totalImages <= maxImages) {
+		return messages;
+	}
+
+	// Fail fast with a clear, localized error when the current turn alone exceeds
+	// the limit — otherwise we'd send a request the server will reject with an
+	// opaque error. Silent history filtering is only safe when dropping history
+	// images can bring the total down to the limit.
+	if (currentTurnImages > maxImages) {
+		throw new Error(l10n.t('Too many images in request: {0} images provided, but the model supports a maximum of {1} images.', currentTurnImages, maxImages));
+	}
+
+	// Walk backward through history (before the current turn), keeping the
+	// most recent images and replacing the oldest with placeholders.
+	let historyBudget = maxImages - currentTurnImages;
+
+	// Collect keep/drop decisions by walking backward through history
+	const historyImageDecisions = new Map<string, boolean>(); // "msgIdx:partIdx" -> keep
+	for (let i = lastUserIdx - 1; i >= 0; i--) {
+		if (!Array.isArray(messages[i].content)) {
+			continue;
+		}
+		for (let j = messages[i].content.length - 1; j >= 0; j--) {
+			if (messages[i].content[j].type === Raw.ChatCompletionContentPartKind.Image) {
+				const key = `${i}:${j}`;
+				if (historyBudget > 0) {
+					historyImageDecisions.set(key, true);
+					historyBudget--;
+				} else {
+					historyImageDecisions.set(key, false);
+				}
+			}
+		}
+	}
+
+	// Build filtered messages, replacing dropped images with text placeholders
+	return messages.map((message, msgIdx) => {
+		if (msgIdx >= lastUserIdx) {
+			return message;
+		}
+		if (!Array.isArray(message.content)) {
+			return message;
+		}
+		if (!message.content.some(p => p.type === Raw.ChatCompletionContentPartKind.Image)) {
+			return message;
+		}
+		return {
+			...message,
+			content: message.content.map((part, partIdx) => {
+				if (part.type !== Raw.ChatCompletionContentPartKind.Image) {
+					return part;
+				}
+				if (historyImageDecisions.get(`${msgIdx}:${partIdx}`)) {
+					return part;
+				}
+				return { type: Raw.ChatCompletionContentPartKind.Text, text: IMAGE_PLACEHOLDER_TEXT };
+			})
+		};
+	});
+}