Skip to content

Commit 06bec76

Browse files
committed
handle complex inPage tool responses
extract creating extra snapshot nodes into separate function format address comments move to McpPage
1 parent 85b8993 commit 06bec76

6 files changed

Lines changed: 818 additions & 63 deletions

File tree

src/McpContext.ts

Lines changed: 187 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ import type {
3131
Target,
3232
Extension,
3333
} from './third_party/index.js';
34-
import type {DevTools} from './third_party/index.js';
35-
import {Locator} from './third_party/index.js';
34+
import type {DevTools, Protocol} from './third_party/index.js';
35+
import {Locator, type ElementHandle} from './third_party/index.js';
3636
import {PredefinedNetworkConditions} from './third_party/index.js';
3737
import {listPages} from './tools/pages.js';
3838
import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js';
@@ -691,6 +691,7 @@ export class McpContext implements Context {
691691
page: McpPage,
692692
verbose = false,
693693
devtoolsData: DevToolsData | undefined = undefined,
694+
extraHandles: ElementHandle[] = [],
694695
): Promise<void> {
695696
const rootNode = await page.pptrPage.accessibility.snapshot({
696697
includeIframes: true,
@@ -708,10 +709,13 @@ export class McpContext implements Context {
708709
let idCounter = 0;
709710
const idToNode = new Map<string, TextSnapshotNode>();
710711
const seenUniqueIds = new Set<string>();
712+
const seenBackendNodeIds = new Set<number>();
711713
const assignIds = (node: SerializedAXNode): TextSnapshotNode => {
712714
let id = '';
713-
// @ts-expect-error untyped loaderId & backendNodeId.
714-
const uniqueBackendId = `${node.loaderId}_${node.backendNodeId}`;
715+
// @ts-expect-error untyped backendNodeId.
716+
const backendNodeId: number = node.backendNodeId;
717+
// @ts-expect-error untyped loaderId.
718+
const uniqueBackendId = `${node.loaderId}_${backendNodeId}`;
715719
if (uniqueBackendNodeIdToMcpId.has(uniqueBackendId)) {
716720
// Re-use MCP exposed ID if the uniqueId is the same.
717721
id = uniqueBackendNodeIdToMcpId.get(uniqueBackendId)!;
@@ -721,6 +725,7 @@ export class McpContext implements Context {
721725
uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id);
722726
}
723727
seenUniqueIds.add(uniqueBackendId);
728+
seenBackendNodeIds.add(backendNodeId);
724729

725730
const nodeWithId: TextSnapshotNode = {
726731
...node,
@@ -744,6 +749,18 @@ export class McpContext implements Context {
744749
};
745750

746751
const rootNodeWithId = assignIds(rootNode);
752+
753+
await this.#insertExtraNodes(
754+
page,
755+
idToNode,
756+
seenUniqueIds,
757+
snapshotId,
758+
idCounter,
759+
rootNodeWithId,
760+
seenBackendNodeIds,
761+
extraHandles,
762+
);
763+
747764
const snapshot: TextSnapshot = {
748765
root: rootNodeWithId,
749766
snapshotId: String(snapshotId),
@@ -768,6 +785,172 @@ export class McpContext implements Context {
768785
}
769786
}
770787

788+
// ExtraHandles represent DOM nodes which might not be part of the accessibility tree, e.g. DOM nodes
789+
// returned by in-page tools. We insert them into the tree by finding the closest ancestor in the
790+
// tree and inserting the node as a child. The ancestor's child nodes are re-parented if necessary.
791+
async #insertExtraNodes(
792+
page: McpPage,
793+
idToNode: Map<string, TextSnapshotNode>,
794+
seenUniqueIds: Set<string>,
795+
snapshotId: number,
796+
idCounter: number,
797+
rootNodeWithId: TextSnapshotNode,
798+
seenBackendNodeIds: Set<number>,
799+
extraHandles: ElementHandle[],
800+
): Promise<void> {
801+
const {uniqueBackendNodeIdToMcpId} = page;
802+
803+
const createExtraNode = async (
804+
handle: ElementHandle,
805+
): Promise<TextSnapshotNode | null> => {
806+
const backendNodeId = await handle.backendNodeId();
807+
if (!backendNodeId || seenBackendNodeIds.has(backendNodeId)) {
808+
return null;
809+
}
810+
const uniqueBackendId = `custom_${backendNodeId}`;
811+
if (seenUniqueIds.has(uniqueBackendId)) {
812+
return null;
813+
}
814+
seenBackendNodeIds.add(backendNodeId);
815+
816+
let id = '';
817+
const mcpId = uniqueBackendNodeIdToMcpId.get(uniqueBackendId);
818+
if (mcpId !== undefined) {
819+
id = mcpId;
820+
} else {
821+
id = `${snapshotId}_${idCounter++}`;
822+
uniqueBackendNodeIdToMcpId.set(uniqueBackendId, id);
823+
}
824+
seenUniqueIds.add(uniqueBackendId);
825+
826+
const tagHandle = await handle.getProperty('localName');
827+
const tagValue = await tagHandle.jsonValue();
828+
const extraNode: TextSnapshotNode = {
829+
role: tagValue,
830+
id,
831+
backendNodeId,
832+
children: [],
833+
elementHandle: async () => handle,
834+
};
835+
return extraNode;
836+
};
837+
838+
const findAncestorNode = async (
839+
handle: ElementHandle,
840+
): Promise<TextSnapshotNode | null> => {
841+
let ancestorHandle = await handle.evaluateHandle(el => el.parentElement);
842+
843+
while (ancestorHandle) {
844+
const ancestorElement = ancestorHandle.asElement();
845+
if (!ancestorElement) {
846+
await ancestorHandle.dispose();
847+
return null;
848+
}
849+
850+
const ancestorBackendId = await ancestorElement.backendNodeId();
851+
if (ancestorBackendId) {
852+
const ancestorNode = idToNode
853+
.values()
854+
.find(node => node.backendNodeId === ancestorBackendId);
855+
if (ancestorNode) {
856+
await ancestorHandle.dispose();
857+
return ancestorNode;
858+
}
859+
}
860+
861+
const nextHandle = await ancestorElement.evaluateHandle(
862+
el => el.parentElement,
863+
);
864+
await ancestorHandle.dispose();
865+
ancestorHandle = nextHandle;
866+
}
867+
return null;
868+
};
869+
870+
const findDescendantNodes = async (
871+
backendNodeId: number,
872+
): Promise<Set<number>> => {
873+
const descendantIds = new Set<number>();
874+
try {
875+
// @ts-expect-error internal API
876+
const client = page.pptrPage._client();
877+
if (client) {
878+
const {node}: {node: Protocol.DOM.Node} = await client.send(
879+
'DOM.describeNode',
880+
{
881+
backendNodeId,
882+
depth: -1,
883+
pierce: true,
884+
},
885+
);
886+
const collect = (node: Protocol.DOM.Node) => {
887+
if (node.backendNodeId && node.backendNodeId !== backendNodeId) {
888+
descendantIds.add(node.backendNodeId);
889+
}
890+
if (node.children) {
891+
for (const child of node.children) {
892+
collect(child);
893+
}
894+
}
895+
};
896+
collect(node);
897+
}
898+
} catch (e) {
899+
this.logger(
900+
`Failed to collect descendants for backend node ${backendNodeId}`,
901+
e,
902+
);
903+
}
904+
return descendantIds;
905+
};
906+
907+
const moveChildNodes = (
908+
attachTarget: TextSnapshotNode,
909+
extraNode: TextSnapshotNode,
910+
descendantIds: Set<number>,
911+
): number => {
912+
let firstMovedIndex = -1;
913+
if (descendantIds.size > 0 && attachTarget.children) {
914+
const remainingChildren: TextSnapshotNode[] = [];
915+
for (const child of attachTarget.children) {
916+
if (child.backendNodeId && descendantIds.has(child.backendNodeId)) {
917+
if (firstMovedIndex === -1) {
918+
firstMovedIndex = remainingChildren.length;
919+
}
920+
extraNode.children.push(child);
921+
} else {
922+
remainingChildren.push(child);
923+
}
924+
}
925+
attachTarget.children = remainingChildren;
926+
}
927+
return firstMovedIndex !== -1
928+
? firstMovedIndex
929+
: attachTarget.children
930+
? attachTarget.children.length
931+
: 0;
932+
};
933+
934+
if (extraHandles.length) {
935+
page.extraHandles = extraHandles;
936+
}
937+
for (const handle of page.extraHandles) {
938+
const extraNode = await createExtraNode(handle);
939+
if (!extraNode) {
940+
continue;
941+
}
942+
idToNode.set(extraNode.id, extraNode);
943+
const attachTarget = (await findAncestorNode(handle)) || rootNodeWithId;
944+
if (extraNode.backendNodeId !== undefined) {
945+
const descendantIds = await findDescendantNodes(
946+
extraNode.backendNodeId,
947+
);
948+
const index = moveChildNodes(attachTarget, extraNode, descendantIds);
949+
attachTarget.children.splice(index, 0, extraNode);
950+
}
951+
}
952+
}
953+
771954
async saveTemporaryFile(
772955
data: Uint8Array<ArrayBufferLike>,
773956
filename: string,

0 commit comments

Comments
 (0)