Skip to content

Commit 4be5209

Browse files
authored
Enhance PromptFileParser to ignore links and variables inside inline code and fenced code blocks (#298344)
1 parent 6eee773 commit 4be5209

File tree

2 files changed

+232
-0
lines changed

2 files changed

+232
-0
lines changed

src/vs/workbench/contrib/chat/common/promptSyntax/promptFileParser.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,14 +416,66 @@ export class PromptBody {
416416
const fileReferences: IBodyFileReference[] = [];
417417
const variableReferences: IBodyVariableReference[] = [];
418418
const bodyOffset = Iterable.reduce(Iterable.slice(this.linesWithEOL, 0, this.range.startLineNumber - 1), (len, line) => line.length + len, 0);
419+
let inFencedCodeBlock = false;
420+
let fencedCodeBlockFenceChar: string | undefined;
421+
let fencedCodeBlockFenceLength = 0;
419422
for (let i = this.range.startLineNumber - 1, lineStartOffset = bodyOffset; i < this.range.endLineNumber - 1; i++) {
420423
const line = this.linesWithEOL[i];
424+
const trimmedLine = line.trimStart();
425+
426+
// Detect fenced code block lines (``` or ~~~, 3 or more chars)
427+
const fenceMatch = /^(?<fence>(`{3,}|~{3,}))/u.exec(trimmedLine);
428+
if (fenceMatch) {
429+
const fence = fenceMatch.groups!.fence;
430+
const fenceChar = fence[0];
431+
const fenceLength = fence.length;
432+
const restOfLine = trimmedLine.slice(fence.length);
433+
434+
if (!inFencedCodeBlock) {
435+
// Opening fence: record fence char/length and enter fenced code block
436+
inFencedCodeBlock = true;
437+
fencedCodeBlockFenceChar = fenceChar;
438+
fencedCodeBlockFenceLength = fenceLength;
439+
lineStartOffset += line.length;
440+
continue;
441+
}
442+
443+
// Potential closing fence: must match fence char and have at least the same length,
444+
// and only whitespace is allowed after the fence.
445+
if (fencedCodeBlockFenceChar === fenceChar && fenceLength >= fencedCodeBlockFenceLength && /^\s*$/.test(restOfLine)) {
446+
inFencedCodeBlock = false;
447+
fencedCodeBlockFenceChar = undefined;
448+
fencedCodeBlockFenceLength = 0;
449+
lineStartOffset += line.length;
450+
continue;
451+
}
452+
}
453+
454+
// Skip all lines inside fenced code blocks
455+
if (inFencedCodeBlock) {
456+
lineStartOffset += line.length;
457+
continue;
458+
}
459+
460+
// Collect inline code spans (backtick-delimited) to exclude from matching
461+
const inlineCodeRanges: { start: number; end: number }[] = [];
462+
for (const inlineMatch of line.matchAll(/`[^`]+`/g)) {
463+
inlineCodeRanges.push({ start: inlineMatch.index, end: inlineMatch.index + inlineMatch[0].length });
464+
}
465+
466+
const isInsideInlineCode = (offset: number) => {
467+
return inlineCodeRanges.some(r => offset >= r.start && offset < r.end);
468+
};
469+
421470
// Match markdown links: [text](link)
422471
const linkMatch = line.matchAll(/\[(.*?)\]\((.+?)\)/g);
423472
for (const match of linkMatch) {
424473
if (match.index > 0 && line[match.index - 1] === '!') {
425474
continue; // skip image links
426475
}
476+
if (isInsideInlineCode(match.index)) {
477+
continue; // skip matches inside inline code
478+
}
427479
const linkEndOffset = match.index + match[0].length - 1; // before the parenthesis
428480
const linkStartOffset = match.index + match[0].length - match[2].length - 1;
429481
const range = new Range(i + 1, linkStartOffset + 1, i + 1, linkEndOffset + 1);
@@ -440,6 +492,9 @@ export class PromptBody {
440492
if (markdownLinkRanges.some(mdRange => Range.areIntersectingOrTouching(mdRange, fullRange))) {
441493
continue;
442494
}
495+
if (isInsideInlineCode(match.index)) {
496+
continue; // skip matches inside inline code
497+
}
443498
const contentMatch = match.groups?.['filePath'] || match.groups?.['toolName'];
444499
if (!contentMatch) {
445500
continue;

src/vs/workbench/contrib/chat/test/common/promptSyntax/service/promptFileParser.test.ts

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,183 @@ suite('PromptFileParser', () => {
239239
assert.deepEqual(result.header.tools, ['search', 'terminal']);
240240
});
241241

242+
test('ignores links and variables inside inline code and fenced code blocks', async () => {
243+
const uri = URI.parse('file:///test/prompt3.md');
244+
const content = [
245+
'---',
246+
`description: "Prompt with markdown code"`,
247+
'---',
248+
'Outside #tool:outside and [outside](./outside.md).',
249+
'Inline code: `#tool:inline and [inline](./inline.md)` should be ignored.',
250+
'```ts',
251+
'#tool:block and #file:./inside-block.md and [block](./block.md)',
252+
'```',
253+
'After block #file:./after.md and [after](./after-link.md).',
254+
].join('\n');
255+
256+
const result = new PromptFileParser().parse(uri, content);
257+
assert.ok(result.body);
258+
assert.deepEqual(result.body.fileReferences.map(reference => ({ content: reference.content, isMarkdownLink: reference.isMarkdownLink })), [
259+
{ content: './outside.md', isMarkdownLink: true },
260+
{ content: './after.md', isMarkdownLink: false },
261+
{ content: './after-link.md', isMarkdownLink: true }
262+
]);
263+
assert.deepEqual(result.body.variableReferences.map(reference => reference.name), ['outside']);
264+
});
265+
266+
test('ignores references in multiple inline code spans on the same line', async () => {
267+
const uri = URI.parse('file:///test/prompt-inline.md');
268+
const content = [
269+
'---',
270+
'description: "test"',
271+
'---',
272+
'Before `#tool:ignored1` middle #tool:visible `[link](./ignored.md)` after [real](./real.md).',
273+
].join('\n');
274+
275+
const result = new PromptFileParser().parse(uri, content);
276+
assert.ok(result.body);
277+
assert.deepEqual(result.body.fileReferences.map(r => ({ content: r.content, isMarkdownLink: r.isMarkdownLink })), [
278+
{ content: './real.md', isMarkdownLink: true },
279+
]);
280+
assert.deepEqual(result.body.variableReferences.map(r => r.name), ['visible']);
281+
});
282+
283+
test('handles fenced code block without language specifier', async () => {
284+
const uri = URI.parse('file:///test/prompt-fence.md');
285+
const content = [
286+
'---',
287+
'description: "test"',
288+
'---',
289+
'```',
290+
'#file:./ignored.md',
291+
'[link](./ignored-link.md)',
292+
'```',
293+
'#file:./visible.md',
294+
].join('\n');
295+
296+
const result = new PromptFileParser().parse(uri, content);
297+
assert.ok(result.body);
298+
assert.deepEqual(result.body.fileReferences.map(r => ({ content: r.content, isMarkdownLink: r.isMarkdownLink })), [
299+
{ content: './visible.md', isMarkdownLink: false },
300+
]);
301+
assert.deepEqual(result.body.variableReferences, []);
302+
});
303+
304+
test('handles multiple fenced code blocks', async () => {
305+
const uri = URI.parse('file:///test/prompt-multi-fence.md');
306+
const content = [
307+
'---',
308+
'description: "test"',
309+
'---',
310+
'#tool:before',
311+
'```js',
312+
'#tool:ignored1',
313+
'```',
314+
'#tool:between',
315+
'```python',
316+
'#tool:ignored2',
317+
'```',
318+
'#tool:after',
319+
].join('\n');
320+
321+
const result = new PromptFileParser().parse(uri, content);
322+
assert.ok(result.body);
323+
assert.deepEqual(result.body.variableReferences.map(r => r.name), ['before', 'between', 'after']);
324+
});
325+
326+
test('unclosed fenced code block ignores all remaining lines', async () => {
327+
const uri = URI.parse('file:///test/prompt-unclosed.md');
328+
const content = [
329+
'---',
330+
'description: "test"',
331+
'---',
332+
'#tool:visible',
333+
'```',
334+
'#tool:ignored',
335+
'#file:./ignored.md',
336+
].join('\n');
337+
338+
const result = new PromptFileParser().parse(uri, content);
339+
assert.ok(result.body);
340+
assert.deepEqual(result.body.variableReferences.map(r => r.name), ['visible']);
341+
assert.deepEqual(result.body.fileReferences, []);
342+
});
343+
344+
test('adjacent inline code does not suppress outside references', async () => {
345+
const uri = URI.parse('file:///test/prompt-adjacent.md');
346+
const content = [
347+
'---',
348+
'description: "test"',
349+
'---',
350+
'`code`#tool:attached `more`[link](./file.md)',
351+
].join('\n');
352+
353+
const result = new PromptFileParser().parse(uri, content);
354+
assert.ok(result.body);
355+
// #tool:attached starts right after the closing backtick, so it's outside inline code
356+
assert.deepEqual(result.body.variableReferences.map(r => r.name), ['attached']);
357+
// [link](./file.md) starts after the second inline code span
358+
assert.deepEqual(result.body.fileReferences.map(r => ({ content: r.content, isMarkdownLink: r.isMarkdownLink })), [
359+
{ content: './file.md', isMarkdownLink: true },
360+
]);
361+
});
362+
363+
test('indented fenced code block is still detected', async () => {
364+
const uri = URI.parse('file:///test/prompt-indent.md');
365+
const content = [
366+
'---',
367+
'description: "test"',
368+
'---',
369+
' ```ts',
370+
' #tool:ignored',
371+
' ```',
372+
'#tool:visible',
373+
].join('\n');
374+
375+
const result = new PromptFileParser().parse(uri, content);
376+
assert.ok(result.body);
377+
assert.deepEqual(result.body.variableReferences.map(r => r.name), ['visible']);
378+
});
379+
380+
test('fenced code block with 4 backticks', async () => {
381+
const uri = URI.parse('file:///test/prompt-4tick.md');
382+
const content = [
383+
'---',
384+
'description: "test"',
385+
'---',
386+
'````',
387+
'#tool:ignored and [link](./ignored.md)',
388+
'````',
389+
'#tool:visible',
390+
].join('\n');
391+
392+
const result = new PromptFileParser().parse(uri, content);
393+
assert.ok(result.body);
394+
assert.deepEqual(result.body.variableReferences.map(r => r.name), ['visible']);
395+
assert.deepEqual(result.body.fileReferences, []);
396+
});
397+
398+
test('fenced code block with tilde fence (~~~)', async () => {
399+
const uri = URI.parse('file:///test/prompt-tilde.md');
400+
const content = [
401+
'---',
402+
'description: "test"',
403+
'---',
404+
'~~~',
405+
'#file:./ignored.md and [link](./ignored-link.md)',
406+
'#tool:ignored',
407+
'~~~',
408+
'[real](./real.md)',
409+
].join('\n');
410+
411+
const result = new PromptFileParser().parse(uri, content);
412+
assert.ok(result.body);
413+
assert.deepEqual(result.body.fileReferences.map(r => ({ content: r.content, isMarkdownLink: r.isMarkdownLink })), [
414+
{ content: './real.md', isMarkdownLink: true },
415+
]);
416+
assert.deepEqual(result.body.variableReferences, []);
417+
});
418+
242419

243420
test('agent with agents', async () => {
244421
const uri = URI.parse('file:///test/test.agent.md');

0 commit comments

Comments
 (0)