From da2e5d1314b7504877fd50090e6a4b47f44fb9f6 Mon Sep 17 00:00:00 2001 From: Miguel <36487034+miguelg719@users.noreply.github.com> Date: Mon, 3 Feb 2025 20:29:55 -0800 Subject: [PATCH] A11y empty xpath fix (#458) * js function declared as string * fixed a11y missing roles, improved xpath gen for text nodes * added changeset --- .changeset/gentle-pans-mix.md | 5 +++ lib/a11y/utils.ts | 62 ++++++++++++++++++++++++---------- lib/handlers/observeHandler.ts | 37 ++++++++++++++++++-- 3 files changed, 83 insertions(+), 21 deletions(-) create mode 100644 .changeset/gentle-pans-mix.md diff --git a/.changeset/gentle-pans-mix.md b/.changeset/gentle-pans-mix.md new file mode 100644 index 00000000..92ff30f1 --- /dev/null +++ b/.changeset/gentle-pans-mix.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Updated getAccessibilityTree() to make sure it doesn't skip useful nodes. Improved getXPathByResolvedObjectId() to account for text nodes and not skip generation diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index d3fa88e0..e39ec6f9 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -33,6 +33,11 @@ export function formatSimplifiedTree( function cleanStructuralNodes( node: AccessibilityNode, ): AccessibilityNode | null { + // Filter out nodes with negative IDs + if (node.nodeId && parseInt(node.nodeId) < 0) { + return null; + } + // Base case: leaf node if (!node.children) { return node.role === "generic" || node.role === "none" ? null : node; @@ -181,33 +186,54 @@ export async function getAccessibilityTree( // This function is wrapped into a string and sent as a CDP command // It is not meant to be actually executed here -const functionString = `function getNodePath(el) { - if (!el || el.nodeType !== Node.ELEMENT_NODE) return ""; - const pathSegments = []; +const functionString = ` +function getNodePath(el) { + if (!el || (el.nodeType !== Node.ELEMENT_NODE && el.nodeType !== Node.TEXT_NODE)) { + console.log("el is not a valid node type"); + return ""; + } + + const parts = []; let current = el; - while (current && current.nodeType === Node.ELEMENT_NODE) { - const tagName = current.nodeName.toLowerCase(); - let index = 1; - let sibling = current.previousSibling; - while (sibling) { + + while (current && (current.nodeType === Node.ELEMENT_NODE || current.nodeType === Node.TEXT_NODE)) { + let index = 0; + let hasSameTypeSiblings = false; + const siblings = current.parentElement + ? Array.from(current.parentElement.childNodes) + : []; + + for (let i = 0; i < siblings.length; i++) { + const sibling = siblings[i]; if ( - sibling.nodeType === Node.ELEMENT_NODE && - sibling.nodeName.toLowerCase() === tagName + sibling.nodeType === current.nodeType && + sibling.nodeName === current.nodeName ) { - index++; + index = index + 1; + hasSameTypeSiblings = true; + if (sibling.isSameNode(current)) { + break; + } } - sibling = sibling.previousSibling; } - const segment = index > 1 ? tagName + "[" + index + "]" : tagName; - pathSegments.unshift(segment); - current = current.parentNode; + if (!current || !current.parentNode) break; - if (current.nodeName.toLowerCase() === "html") { - pathSegments.unshift("html"); + if (current.nodeName.toLowerCase() === "html"){ + parts.unshift("html"); break; } + + // text nodes are handled differently in XPath + if (current.nodeName !== "#text") { + const tagName = current.nodeName.toLowerCase(); + const pathIndex = hasSameTypeSiblings ? \`[\${index}]\` : ""; + parts.unshift(\`\${tagName}\${pathIndex}\`); + } + + current = current.parentElement; } - return "/" + pathSegments.join("/"); + + return parts.length ? \`/\${parts.join("/")}\` : ""; }`; export async function getXPathByResolvedObjectId( diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index 89d09214..9f73caa3 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -114,15 +114,46 @@ export class StagehandObserveHandler { if (useAccessibilityTree) { // Generate xpath for the given element if not found in selectorMap + this.logger({ + category: "observation", + message: "Getting xpath for element", + level: 1, + auxiliary: { + elementId: { + value: elementId.toString(), + type: "string", + }, + }, + }); + + const args = { backendNodeId: elementId }; const { object } = await this.stagehandPage.sendCDP<{ object: { objectId: string }; - }>("DOM.resolveNode", { - backendNodeId: elementId, - }); + }>("DOM.resolveNode", args); + + if (!object || !object.objectId) { + this.logger({ + category: "observation", + message: `Invalid object ID returned for element: ${elementId}`, + level: 1, + }); + return null; + } + const xpath = await getXPathByResolvedObjectId( await this.stagehandPage.getCDPClient(), object.objectId, ); + + if (!xpath || xpath === "") { + this.logger({ + category: "observation", + message: `Empty xpath returned for element: ${elementId}`, + level: 1, + }); + return null; + } + return { ...rest, selector: `xpath=${xpath}`,