Skip to content

Commit 1bd10d3

Browse files
authored
[BUG-259] Web Xpath Fixes (#685)
* Refactor text extraction logic in hasOwnText function and update output format in page map generation * Add XPath generation to page map extraction for interactive elements * Refactor getXPath function to return full absolute XPath for deterministic page_map_json and enhance attribute collection for action elements
1 parent cbc50d5 commit 1bd10d3

1 file changed

Lines changed: 8 additions & 24 deletions

File tree

Apps/Web/aiplugin/page_map_extractor.js

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,28 +18,8 @@ function extractPageMapData() {
1818

1919
// ── XPath generator ──────────────────────────────────────────────────────
2020
function getXPath(el) {
21-
if (el.id) return `//*[@id="${el.id}"]`;
22-
const stableAttrs = ['name', 'data-testid', 'aria-label', 'placeholder'];
23-
for (const attr of stableAttrs) {
24-
const val = el.getAttribute(attr);
25-
if (val) {
26-
const tag = el.tagName.toLowerCase();
27-
try {
28-
if (document.querySelectorAll(`${tag}[${attr}="${CSS.escape(val)}"]`).length === 1)
29-
return `//${tag}[@${attr}="${val}"]`;
30-
} catch (e) {
31-
// Ignore escaping errors
32-
}
33-
}
34-
}
35-
if (['BUTTON', 'A'].includes(el.tagName)) {
36-
const txt = el.innerText.trim().slice(0, 60);
37-
if (txt) {
38-
const tag = el.tagName.toLowerCase();
39-
const hits = [...document.querySelectorAll(tag)].filter(e => e.innerText.trim().startsWith(txt));
40-
if (hits.length === 1) return `//${tag}[normalize-space()="${txt}"]`;
41-
}
42-
}
21+
// Always return full absolute xpath from document root.
22+
// This keeps page_map_json xpaths deterministic and directly resolvable later.
4323
function pos(e) {
4424
const tag = e.tagName.toLowerCase();
4525
const sibs = [...e.parentNode.children].filter(c => c.tagName === e.tagName);
@@ -98,7 +78,7 @@ function extractPageMapData() {
9878
}
9979

10080
function hasOwnText(el) {
101-
const full = norm(el.innerText, 200);
81+
const full = norm(getDirectText(el), 200);
10282
if (!full || full.length < 3) return false;
10383
if (actionEls.has(el)) return false;
10484
const nested = [...el.querySelectorAll(ACTION_SELECTOR)];
@@ -121,6 +101,9 @@ function extractPageMapData() {
121101
_type: 'action',
122102
_el: el,
123103
_order: domOrderMap.get(el) || 0,
104+
attributes: Object.fromEntries(
105+
[...el.attributes].map(attr => [attr.name, attr.value])
106+
),
124107
tag: el.tagName.toLowerCase(),
125108
type: el.getAttribute('type') || null,
126109
role: el.getAttribute('role') || el.tagName.toLowerCase(),
@@ -161,6 +144,7 @@ function extractPageMapData() {
161144
kind: isHeading ? 'heading' : (el.getAttribute('role') || tag),
162145
text,
163146
in_viewport: true,
147+
xpath: getXPath(el),
164148
});
165149
});
166150

@@ -179,7 +163,7 @@ function extractPageMapData() {
179163
if (node.node_type === "text") {
180164
const kind = (node.kind || "text").toUpperCase();
181165
const vp = node.in_viewport ? "👁" : "↕";
182-
lines.push(` ${vp} [${kind}] "${node.text}"`);
166+
lines.push(` ${vp} [${node.idx}] [${kind}] "${node.text}"`);
183167
} else {
184168
const parts = [`[${node.idx}]`, (node.role || '').toUpperCase()];
185169
if (node.label) parts.push(`label='${node.label}'`);

0 commit comments

Comments
 (0)