|
| 1 | +/*--------------------------------------------------------------------------------------------- |
| 2 | + * Copyright (c) Microsoft Corporation. All rights reserved. |
| 3 | + * Licensed under the MIT License. See License.txt in the project root for license information. |
| 4 | + *--------------------------------------------------------------------------------------------*/ |
| 5 | +import * as l10n from '@vscode/l10n'; |
| 6 | +import { Raw } from '@vscode/prompt-tsx'; |
| 7 | + |
| 8 | +/** |
| 9 | + * Model-facing placeholder substituted for dropped history images. |
| 10 | + * Intentionally not localized — this text is sent to the model, not the user. |
| 11 | + */ |
| 12 | +const IMAGE_PLACEHOLDER_TEXT = '[Image omitted from conversation history due to model limit.]'; |
| 13 | + |
| 14 | +/** |
| 15 | + * Silently drops the oldest images from history when the total number of images |
| 16 | + * in the conversation exceeds `maxImages`. Images belonging to the current turn |
| 17 | + * (the last user message and anything after it, e.g. recent tool results) are |
| 18 | + * always preserved. |
| 19 | + * |
| 20 | + * If the current turn alone exceeds the limit, throws a localized error rather |
| 21 | + * than sending a request we know will be rejected with an opaque server error. |
| 22 | + * |
| 23 | + * @returns A (possibly filtered) copy of messages. The original array is never mutated. |
| 24 | + */ |
| 25 | +export function filterHistoryImages(messages: Raw.ChatMessage[], maxImages: number): Raw.ChatMessage[] { |
| 26 | + // Anchor the current turn at the last user message; anything at or after this |
| 27 | + // index is treated as "current turn" and its images are never filtered. |
| 28 | + let lastUserIdx = -1; |
| 29 | + for (let i = messages.length - 1; i >= 0; i--) { |
| 30 | + if (messages[i].role === Raw.ChatRole.User) { |
| 31 | + lastUserIdx = i; |
| 32 | + break; |
| 33 | + } |
| 34 | + } |
| 35 | + |
| 36 | + // Corner case: no user message at all (e.g. system-only history). Treat the |
| 37 | + // last message as the current turn so we still filter earlier images. |
| 38 | + if (lastUserIdx === -1 && messages.length > 0) { |
| 39 | + lastUserIdx = messages.length - 1; |
| 40 | + } |
| 41 | + |
| 42 | + // Count images in the current turn (the last user message and anything after it). |
| 43 | + let currentTurnImages = 0; |
| 44 | + for (let i = Math.max(lastUserIdx, 0); i < messages.length; i++) { |
| 45 | + const content = messages[i].content; |
| 46 | + if (!Array.isArray(content)) { |
| 47 | + continue; |
| 48 | + } |
| 49 | + for (const part of content) { |
| 50 | + if (part.type === Raw.ChatCompletionContentPartKind.Image) { |
| 51 | + currentTurnImages++; |
| 52 | + } |
| 53 | + } |
| 54 | + } |
| 55 | + |
| 56 | + // Count total images across all messages |
| 57 | + let totalImages = 0; |
| 58 | + for (const message of messages) { |
| 59 | + if (Array.isArray(message.content)) { |
| 60 | + for (const part of message.content) { |
| 61 | + if (part.type === Raw.ChatCompletionContentPartKind.Image) { |
| 62 | + totalImages++; |
| 63 | + } |
| 64 | + } |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + // No filtering needed if total is within the limit |
| 69 | + if (totalImages <= maxImages) { |
| 70 | + return messages; |
| 71 | + } |
| 72 | + |
| 73 | + // Fail fast with a clear, localized error when the current turn alone exceeds |
| 74 | + // the limit — otherwise we'd send a request the server will reject with an |
| 75 | + // opaque error. Silent history filtering is only safe when dropping history |
| 76 | + // images can bring the total down to the limit. |
| 77 | + if (currentTurnImages > maxImages) { |
| 78 | + throw new Error(l10n.t('Too many images in request: {0} images provided, but the model supports a maximum of {1} images.', currentTurnImages, maxImages)); |
| 79 | + } |
| 80 | + |
| 81 | + // Walk backward through history (before the current turn), keeping the |
| 82 | + // most recent images and replacing the oldest with placeholders. |
| 83 | + let historyBudget = maxImages - currentTurnImages; |
| 84 | + |
| 85 | + // Collect keep/drop decisions by walking backward through history |
| 86 | + const historyImageDecisions = new Map<string, boolean>(); // "msgIdx:partIdx" -> keep |
| 87 | + for (let i = lastUserIdx - 1; i >= 0; i--) { |
| 88 | + if (!Array.isArray(messages[i].content)) { |
| 89 | + continue; |
| 90 | + } |
| 91 | + for (let j = messages[i].content.length - 1; j >= 0; j--) { |
| 92 | + if (messages[i].content[j].type === Raw.ChatCompletionContentPartKind.Image) { |
| 93 | + const key = `${i}:${j}`; |
| 94 | + if (historyBudget > 0) { |
| 95 | + historyImageDecisions.set(key, true); |
| 96 | + historyBudget--; |
| 97 | + } else { |
| 98 | + historyImageDecisions.set(key, false); |
| 99 | + } |
| 100 | + } |
| 101 | + } |
| 102 | + } |
| 103 | + |
| 104 | + // Build filtered messages, replacing dropped images with text placeholders |
| 105 | + return messages.map((message, msgIdx) => { |
| 106 | + if (msgIdx >= lastUserIdx) { |
| 107 | + return message; |
| 108 | + } |
| 109 | + if (!Array.isArray(message.content)) { |
| 110 | + return message; |
| 111 | + } |
| 112 | + if (!message.content.some(p => p.type === Raw.ChatCompletionContentPartKind.Image)) { |
| 113 | + return message; |
| 114 | + } |
| 115 | + return { |
| 116 | + ...message, |
| 117 | + content: message.content.map((part, partIdx) => { |
| 118 | + if (part.type !== Raw.ChatCompletionContentPartKind.Image) { |
| 119 | + return part; |
| 120 | + } |
| 121 | + if (historyImageDecisions.get(`${msgIdx}:${partIdx}`)) { |
| 122 | + return part; |
| 123 | + } |
| 124 | + return { type: Raw.ChatCompletionContentPartKind.Text, text: IMAGE_PLACEHOLDER_TEXT }; |
| 125 | + }) |
| 126 | + }; |
| 127 | + }); |
| 128 | +} |
0 commit comments