Skip to content

Commit 78505fc

Browse files
committed
handle complex inPage tool responses
1 parent 0aff266 commit 78505fc

6 files changed

Lines changed: 759 additions & 21 deletions

File tree

src/McpContext.ts

Lines changed: 161 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import {
1616
type ListenerMap,
1717
type UncaughtError,
1818
} from './PageCollector.js';
19-
import type {DevTools} from './third_party/index.js';
19+
import type {DevTools, Protocol} from './third_party/index.js';
2020
import type {
2121
Browser,
2222
BrowserContext,
@@ -29,14 +29,15 @@ import type {
2929
Viewport,
3030
Target,
3131
} from './third_party/index.js';
32-
import {Locator} from './third_party/index.js';
32+
import {Locator, type ElementHandle} from './third_party/index.js';
3333
import {PredefinedNetworkConditions} from './third_party/index.js';
3434
import {listPages} from './tools/pages.js';
3535
import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js';
3636
import type {
3737
Context,
3838
DevToolsData,
3939
SupportedExtensions,
40+
ContextPage,
4041
} from './tools/ToolDefinition.js';
4142
import type {TraceResult} from './trace-processing/parse.js';
4243
import type {
@@ -78,7 +79,7 @@ export class McpContext implements Context {
7879
#extensionServiceWorkers: ExtensionServiceWorker[] = [];
7980

8081
#mcpPages = new Map<Page, McpPage>();
81-
#selectedPage?: McpPage;
82+
#selectedPage?: ContextPage;
8283
#networkCollector: NetworkCollector;
8384
#consoleCollector: ConsoleCollector;
8485
#devtoolsUniverseManager: UniverseManager;
@@ -163,7 +164,10 @@ export class McpContext implements Context {
163164
return context;
164165
}
165166

166-
resolveCdpRequestId(page: McpPage, cdpRequestId: string): number | undefined {
167+
resolveCdpRequestId(
168+
page: ContextPage,
169+
cdpRequestId: string,
170+
): number | undefined {
167171
if (!cdpRequestId) {
168172
this.logger('no network request');
169173
return;
@@ -180,14 +184,14 @@ export class McpContext implements Context {
180184
}
181185

182186
resolveCdpElementId(
183-
page: McpPage,
187+
page: ContextPage,
184188
cdpBackendNodeId: number,
185189
): string | undefined {
186190
if (!cdpBackendNodeId) {
187191
this.logger('no cdpBackendNodeId');
188192
return;
189193
}
190-
const snapshot = page.textSnapshot;
194+
const snapshot = page.getSnapshot();
191195
if (!snapshot) {
192196
this.logger('no text snapshot');
193197
return;
@@ -280,7 +284,7 @@ export class McpContext implements Context {
280284
return this.#networkCollector.getById(page.pptrPage, reqid);
281285
}
282286

283-
async restoreEmulation(page: McpPage) {
287+
async restoreEmulation(page: ContextPage) {
284288
const currentSetting = page.emulationSettings;
285289
await this.emulate(currentSetting, page.pptrPage);
286290
}
@@ -446,7 +450,7 @@ export class McpContext implements Context {
446450
return this.#selectedPage?.pptrPage === page;
447451
}
448452

449-
selectPage(newPage: McpPage): void {
453+
selectPage(newPage: ContextPage): void {
450454
this.#selectedPage = newPage;
451455
this.#updateSelectedPageTimeouts();
452456
}
@@ -679,7 +683,7 @@ export class McpContext implements Context {
679683
return this.#mcpPages.get(page)?.devToolsPage;
680684
}
681685

682-
async getDevToolsData(page: McpPage): Promise<DevToolsData> {
686+
async getDevToolsData(page: ContextPage): Promise<DevToolsData> {
683687
try {
684688
this.logger('Getting DevTools UI data');
685689
const devtoolsPage = this.getDevToolsPage(page.pptrPage);
@@ -716,9 +720,10 @@ export class McpContext implements Context {
716720
* Creates a text snapshot of a page.
717721
*/
718722
async createTextSnapshot(
719-
page: McpPage,
723+
page: ContextPage,
720724
verbose = false,
721725
devtoolsData: DevToolsData | undefined = undefined,
726+
extraHandles?: ElementHandle[],
722727
): Promise<void> {
723728
const rootNode = await page.pptrPage.accessibility.snapshot({
724729
includeIframes: true,
@@ -772,14 +777,159 @@ export class McpContext implements Context {
772777
};
773778

774779
const rootNodeWithId = assignIds(rootNode);
780+
781+
const createExtraNode = async (
782+
handle: ElementHandle,
783+
): Promise<TextSnapshotNode | null> => {
784+
const backendNodeId = await handle.backendNodeId();
785+
if (!backendNodeId) {
786+
return null;
787+
}
788+
const uniqueBackendId = `custom_${backendNodeId}`;
789+
if (seenUniqueIds.has(uniqueBackendId)) {
790+
return null;
791+
}
792+
793+
let id = '';
794+
if (uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) {
795+
id = uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!;
796+
} else {
797+
id = `${snapshotId}_${idCounter++}`;
798+
uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id);
799+
}
800+
seenUniqueIds.add(uniqueBackendId);
801+
802+
const tagHandle = await handle.getProperty('localName');
803+
const tagValue = await tagHandle.jsonValue();
804+
const extraNode: TextSnapshotNode = {
805+
role: tagValue,
806+
id,
807+
backendNodeId,
808+
children: [],
809+
elementHandle: async () => handle,
810+
};
811+
return extraNode;
812+
};
813+
814+
const findAncestorNode = async (
815+
handle: ElementHandle,
816+
): Promise<TextSnapshotNode | null> => {
817+
let ancestorHandle = await handle.evaluateHandle(el => el.parentElement);
818+
819+
while (ancestorHandle) {
820+
const ancestorElement = ancestorHandle.asElement();
821+
if (!ancestorElement) {
822+
await ancestorHandle.dispose();
823+
return null;
824+
}
825+
826+
const ancestorBackendId = await ancestorElement.backendNodeId();
827+
if (ancestorBackendId) {
828+
const ancestorNode = idToNode
829+
.values()
830+
.find(node => node.backendNodeId === ancestorBackendId);
831+
if (ancestorNode) {
832+
await ancestorHandle.dispose();
833+
return ancestorNode;
834+
}
835+
}
836+
837+
const nextHandle = await ancestorElement.evaluateHandle(
838+
el => el.parentElement,
839+
);
840+
await ancestorHandle.dispose();
841+
ancestorHandle = nextHandle;
842+
}
843+
return null;
844+
};
845+
846+
const findDescendantNodes = async (
847+
backendNodeId: number,
848+
): Promise<Set<number>> => {
849+
const descendantIds = new Set<number>();
850+
try {
851+
// @ts-expect-error internal API
852+
const client = page.pptrPage._client();
853+
if (client) {
854+
const {node}: {node: Protocol.DOM.Node} = await client.send(
855+
'DOM.describeNode',
856+
{
857+
backendNodeId,
858+
depth: -1,
859+
pierce: true,
860+
},
861+
);
862+
const collect = (node: Protocol.DOM.Node) => {
863+
if (node.backendNodeId && node.backendNodeId !== backendNodeId) {
864+
descendantIds.add(node.backendNodeId);
865+
}
866+
if (node.children) {
867+
for (const child of node.children) {
868+
collect(child);
869+
}
870+
}
871+
};
872+
collect(node);
873+
}
874+
} catch (e) {
875+
this.logger(
876+
`Failed to collect descendants for backend node ${backendNodeId}`,
877+
e,
878+
);
879+
}
880+
return descendantIds;
881+
};
882+
883+
const moveChildNodes = (
884+
attachTarget: TextSnapshotNode,
885+
extraNode: TextSnapshotNode,
886+
descendantIds: Set<number>,
887+
): number => {
888+
let firstMovedIndex = -1;
889+
if (descendantIds.size > 0 && attachTarget.children) {
890+
const remainingChildren: TextSnapshotNode[] = [];
891+
for (const child of attachTarget.children) {
892+
if (child.backendNodeId && descendantIds.has(child.backendNodeId)) {
893+
if (firstMovedIndex === -1) {
894+
firstMovedIndex = remainingChildren.length;
895+
}
896+
extraNode.children.push(child);
897+
} else {
898+
remainingChildren.push(child);
899+
}
900+
}
901+
attachTarget.children = remainingChildren;
902+
}
903+
return firstMovedIndex !== -1
904+
? firstMovedIndex
905+
: attachTarget.children
906+
? attachTarget.children.length
907+
: 0;
908+
};
909+
910+
if (extraHandles) {
911+
page.setExtraHandles(extraHandles);
912+
}
913+
for (const handle of page.getExtraHandles() ?? []) {
914+
const extraNode = await createExtraNode(handle);
915+
if (!extraNode) {
916+
continue;
917+
}
918+
idToNode.set(extraNode.id, extraNode);
919+
const attachTarget = (await findAncestorNode(handle)) || rootNodeWithId;
920+
const descendantIds = await findDescendantNodes(extraNode.backendNodeId!);
921+
const index = moveChildNodes(attachTarget, extraNode, descendantIds);
922+
attachTarget.children.splice(index, 0, extraNode);
923+
}
924+
775925
const snapshot: TextSnapshot = {
776926
root: rootNodeWithId,
777927
snapshotId: String(snapshotId),
778928
idToNode,
779929
hasSelectedElement: false,
780930
verbose,
781931
};
782-
page.textSnapshot = snapshot;
932+
page.setSnapshot(snapshot);
783933
const data = devtoolsData ?? (await this.getDevToolsData(page));
784934
if (data?.cdpBackendNodeId) {
785935
snapshot.hasSelectedElement = true;

src/McpPage.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export class McpPage implements ContextPage {
4040
// Snapshot
4141
textSnapshot: TextSnapshot | null = null;
4242
uniqueBackendNodeIdToMcpId = new Map<string, string>();
43+
extraHandles?: ElementHandle[];
4344

4445
// Emulation
4546
emulationSettings: EmulationSettings = {};
@@ -164,4 +165,20 @@ export class McpPage implements ContextPage {
164165
getAXNodeByUid(uid: string) {
165166
return this.textSnapshot?.idToNode.get(uid);
166167
}
168+
169+
getSnapshot(): TextSnapshot | null {
170+
return this.textSnapshot;
171+
}
172+
173+
setSnapshot(snapshot: TextSnapshot): void {
174+
this.textSnapshot = snapshot;
175+
}
176+
177+
getExtraHandles(): ElementHandle[] | undefined {
178+
return this.extraHandles;
179+
}
180+
181+
setExtraHandles(extraHandles: ElementHandle[]): void {
182+
this.extraHandles = extraHandles;
183+
}
167184
}

src/tools/ToolDefinition.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ import type {
1919
TextSnapshotNode,
2020
GeolocationOptions,
2121
ExtensionServiceWorker,
22+
TextSnapshot,
23+
EmulationSettings,
2224
} from '../types.js';
2325
import type {InstalledExtension} from '../utils/ExtensionRegistry.js';
2426
import type {PaginationOptions} from '../utils/types.js';
@@ -208,6 +210,16 @@ export type Context = Readonly<{
208210
triggerExtensionAction(id: string): Promise<void>;
209211
listExtensions(): InstalledExtension[];
210212
getExtension(id: string): InstalledExtension | undefined;
213+
resolveCdpElementId(
214+
page: ContextPage,
215+
cdpBackendNodeId: number,
216+
): string | undefined;
217+
createTextSnapshot(
218+
page: ContextPage,
219+
verbose: boolean,
220+
devtoolsData: DevToolsData | undefined,
221+
extraHandles?: ElementHandle[],
222+
): Promise<void>;
211223
getSelectedMcpPage(): McpPage;
212224
getExtensionServiceWorkers(): ExtensionServiceWorker[];
213225
getExtensionServiceWorkerId(
@@ -227,6 +239,12 @@ export type ContextPage = Readonly<{
227239
options?: {timeout?: number},
228240
): Promise<void>;
229241
getInPageTools(): ToolGroup<InPageToolDefinition> | undefined;
242+
getSnapshot(): TextSnapshot | null;
243+
setSnapshot(snapshot: TextSnapshot): void;
244+
getExtraHandles(): ElementHandle[] | undefined;
245+
setExtraHandles(extraHandles: ElementHandle[]): void;
246+
readonly uniqueBackendNodeIdToMcpId: Map<string, string>;
247+
readonly emulationSettings: EmulationSettings;
230248
}>;
231249

232250
export function defineTool<Schema extends zod.ZodRawShape>(

0 commit comments

Comments
 (0)