From cddf406ec7f13344f3c37824b3a4457ea045b5ad Mon Sep 17 00:00:00 2001 From: jeremy Date: Mon, 8 Jun 2026 21:27:48 -0700 Subject: [PATCH 1/4] fix(computer): declare all input fields in schema + fix element_at reserved word The computer tool implements every action and ComputerInput field, but parameters_schema() (progressive disclosure) only declared a subset of properties. Models emit only parameters present in the tool's JSON schema, so the undeclared fields could never be sent, making 8 actions impossible to invoke despite being fully implemented: - scroll (needs dx/dy) - drag (needs to_x/to_y) - resize_window (needs w/h) - select_menu (needs menu_path) - window_screenshot (needs window_id) - wait_for (needs contains, timeout_ms) - set_brightness (needs level) - perform_action (needs ax_action) - ocr region variant(needs region) Declare all input fields in the schema. Action *specs* stay in `discover` (progressive disclosure), but the *fields* must be declared or the action is dead on arrival. Trim field descriptions so the always-on schema stays small (~880 tokens) and bump the schema_is_compact bound accordingly. Also fix ax.rs::element_at: the hit-test AppleScript assigned to a local named `result`, which is reserved in AppleScript (holds the last command's value), so element_at always failed with "The variable result is not defined." Rename to `bestHit`. Add a schema_declares_every_input_field regression test so this class of bug (handler requires a field the schema never exposes) can't recur. Fixes #350. --- crates/jcode-app-core/src/tool/computer/ax.rs | 8 +++--- .../jcode-app-core/src/tool/computer/mod.rs | 15 ++++++++++- .../jcode-app-core/src/tool/computer/tests.rs | 25 ++++++++++++++++--- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/crates/jcode-app-core/src/tool/computer/ax.rs b/crates/jcode-app-core/src/tool/computer/ax.rs index d1ae8c2e1..34cd1e737 100644 --- a/crates/jcode-app-core/src/tool/computer/ax.rs +++ b/crates/jcode-app-core/src/tool/computer/ax.rs @@ -295,7 +295,7 @@ pub fn element_at(app: &str, x: f64, y: f64) -> Result { r#" using terms from application "System Events" on hit(el, px, py, idxPath, best) - set result to best + set bestHit to best try set p to position of el set sz to size of el @@ -312,17 +312,17 @@ using terms from application "System Events" try set t to (title of el as text) end try - set result to idxPath & " " & r & " \"" & t & "\" @(" & x1 & "," & y1 & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")" + set bestHit to idxPath & " " & r & " \"" & t & "\" @(" & x1 & "," & y1 & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")" end if end try try set i to 0 repeat with child in (UI elements of el) set i to i + 1 - set result to my hit(child, px, py, idxPath & "." & i, result) + set bestHit to my hit(child, px, py, idxPath & "." & i, bestHit) end repeat end try - return result + return bestHit end hit end using terms from diff --git a/crates/jcode-app-core/src/tool/computer/mod.rs b/crates/jcode-app-core/src/tool/computer/mod.rs index 4557a89e0..5392d340a 100644 --- a/crates/jcode-app-core/src/tool/computer/mod.rs +++ b/crates/jcode-app-core/src/tool/computer/mod.rs @@ -226,7 +226,20 @@ impl Tool for ComputerTool { }, "script": { "type": "string", "description": "AppleScript (run_applescript) or JS (run_jxa) source." }, "depth": { "type": "integer", "description": "Max AX tree depth for ui/find_element (default 12)." }, - "dry_run": { "type": "boolean", "description": "For mutating actions: report the intended action without performing it." } + "to_x": { "type": "number" }, + "to_y": { "type": "number" }, + "dx": { "type": "integer" }, + "dy": { "type": "integer" }, + "w": { "type": "number" }, + "h": { "type": "number" }, + "window_id": { "type": "integer", "description": "window_screenshot id (from list_windows)." }, + "menu_path": { "type": "array", "items": { "type": "string" }, "description": "select_menu path, e.g. [\"File\",\"Save\"]." }, + "ax_action": { "type": "string", "description": "perform_action AX action, e.g. AXShowMenu." }, + "contains": { "type": "string", "description": "wait_for: substring to await." }, + "timeout_ms": { "type": "integer" }, + "region": { "type": "array", "items": { "type": "number" }, "description": "ocr region [x,y,w,h]; omit for full screen." }, + "level": { "type": "number", "description": "set_brightness 0..1." }, + "dry_run": { "type": "boolean", "description": "Mutating actions: report intended action without doing it." } } }) } diff --git a/crates/jcode-app-core/src/tool/computer/tests.rs b/crates/jcode-app-core/src/tool/computer/tests.rs index 4241d1dad..3780ea081 100644 --- a/crates/jcode-app-core/src/tool/computer/tests.rs +++ b/crates/jcode-app-core/src/tool/computer/tests.rs @@ -97,16 +97,35 @@ fn is_mutating_classifies() { assert!(!super::is_mutating("discover")); } +#[test] +fn schema_declares_every_input_field() { + // Regression guard for the "schema omits half its fields" bug: every field + // `dispatch` can require must be declared in `parameters_schema()`, or the + // model can never send it and the action is dead on arrival. + let tool = ComputerTool::new(); + let schema = tool.parameters_schema(); + let props = schema["properties"].as_object().expect("properties object"); + for field in [ + "action", "category", "x", "y", "to_x", "to_y", "w", "h", "text", "keys", "dx", "dy", + "depth", "app", "role", "title", "value", "element", "ax_action", "menu_path", + "window_id", "script", "contains", "timeout_ms", "region", "level", "dry_run", + ] { + assert!(props.contains_key(field), "schema is missing field `{field}`"); + } +} + #[test] fn schema_is_compact() { // Guard against context bloat: the always-on schema + description must stay - // small. Measured well under this bound; alert if it balloons. + // small. Action *specs* live in `discover` (progressive disclosure), but + // every input *field* must be declared here or the model can't send it, so + // the field set (not the action set) sets the floor. Keep always-on cost + // roughly under ~900 tokens; alert if it balloons past that. let tool = ComputerTool::new(); let schema = serde_json::to_string(&tool.parameters_schema()).unwrap(); let total = tool.description().len() + schema.len(); - // ~4 chars/token; keep always-on cost roughly under ~700 tokens. assert!( - total < 2800, + total < 3500, "computer tool always-on size grew to {total} chars (~{} tokens)", total / 4 ); From 9e49dfafe9af0c18f9f06c77c45632d1a0b8f72f Mon Sep 17 00:00:00 2001 From: jeremy Date: Mon, 8 Jun 2026 21:47:09 -0700 Subject: [PATCH 2/4] fix(computer): prune element_at hit-test + correct osa error classification Two production-quality issues found during exhaustive live testing: 1) element_at walked the ENTIRE AX tree, so on large apps (System Settings) it blew past the scripting timeout (~20s) and failed. A child's AX frame is contained in its parent's, so a parent that doesn't contain the point can't have a matching descendant: prune to only descend into subtrees whose frame contains the point, add a depth bound, and give it an explicit 12s timeout. This turns a full-tree walk into a path-length walk. 2) osascript errors -1719 (errAEIllegalIndex / "Invalid index") were misclassified as "Accessibility permission required", sending users to grant a permission they already had when the real cause was "app not running / no front window / AX path out of range". Extract the mapping into a pure `classify_osa_error`, check permission text first, then map -1719/-1728/"invalid index" to an accurate "target not found" message. Use -25211 (errAXAPIDisabled) for the genuine permission code. Add unit tests covering each branch. Refs #350. --- crates/jcode-app-core/src/tool/computer/ax.rs | 52 ++++++++----- .../jcode-app-core/src/tool/computer/osa.rs | 77 +++++++++++++++++-- 2 files changed, 104 insertions(+), 25 deletions(-) diff --git a/crates/jcode-app-core/src/tool/computer/ax.rs b/crates/jcode-app-core/src/tool/computer/ax.rs index 34cd1e737..653c42422 100644 --- a/crates/jcode-app-core/src/tool/computer/ax.rs +++ b/crates/jcode-app-core/src/tool/computer/ax.rs @@ -289,37 +289,51 @@ pub fn select_menu(app: &str, path: &[String]) -> Result { /// Return the element at a screen point (role/title), useful to confirm targets. pub fn element_at(app: &str, x: f64, y: f64) -> Result { - // System Events can hit-test via "UI element N" is awkward; use AX position - // matching by walking and finding the deepest element containing the point. + // Hit-test by walking the AX tree, but PRUNE: only descend into a subtree + // whose own frame contains the point. A child's frame is contained in its + // parent's, so a parent that doesn't contain the point can't have a matching + // descendant. This turns a full-tree walk (which times out on huge apps like + // System Settings) into a path-length walk. Also bound the depth defensively. let script = format!( r#" using terms from application "System Events" - on hit(el, px, py, idxPath, best) - set bestHit to best + on inFrame(el, px, py) try set p to position of el set sz to size of el set x1 to item 1 of p set y1 to item 2 of p - set x2 to x1 + (item 1 of sz) - set y2 to y1 + (item 2 of sz) - if px >= x1 and px <= x2 and py >= y1 and py <= y2 then - set r to "" - try - set r to (role of el as text) - end try - set t to "" - try - set t to (title of el as text) - end try - set bestHit to idxPath & " " & r & " \"" & t & "\" @(" & x1 & "," & y1 & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")" + if px >= x1 and px <= (x1 + (item 1 of sz)) and py >= y1 and py <= (y1 + (item 2 of sz)) then + return true end if end try + return false + end inFrame + on hit(el, px, py, idxPath, maxlvl, lvl, best) + set bestHit to best + if lvl > maxlvl then return bestHit + -- Record this element if it contains the point (deepest wins). + if my inFrame(el, px, py) then + set r to "" + try + set r to (role of el as text) + end try + set t to "" + try + set t to (title of el as text) + end try + set p to position of el + set sz to size of el + set bestHit to idxPath & " " & r & " \"" & t & "\" @(" & (item 1 of p) & "," & (item 2 of p) & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")" + end if + -- Only descend into children that themselves contain the point. try set i to 0 repeat with child in (UI elements of el) set i to i + 1 - set bestHit to my hit(child, px, py, idxPath & "." & i, bestHit) + if my inFrame(child, px, py) then + set bestHit to my hit(child, px, py, idxPath & "." & i, maxlvl, lvl + 1, bestHit) + end if end repeat end try return bestHit @@ -330,7 +344,7 @@ tell application "System Events" set frontApp to first application process whose name is {app} try set win to front window of frontApp - set out to my hit(win, {x}, {y}, "", "(none)") + set out to my hit(win, {x}, {y}, "", 40, 0, "(none)") on error errMsg set out to "(error: " & errMsg & ")" end try @@ -341,7 +355,7 @@ end tell x = x, y = y ); - let res = osa::run_applescript(&script)?; + let res = osa::run_applescript_timeout(&script, Duration::from_secs(12))?; Ok(ToolOutput::new(format!("Deepest element at ({x:.0},{y:.0}) in {app}:\n{res}")) .with_title("element_at") .with_metadata(json!({"app": app, "x": x, "y": y}))) diff --git a/crates/jcode-app-core/src/tool/computer/osa.rs b/crates/jcode-app-core/src/tool/computer/osa.rs index 372876a88..66e7b855e 100644 --- a/crates/jcode-app-core/src/tool/computer/osa.rs +++ b/crates/jcode-app-core/src/tool/computer/osa.rs @@ -41,30 +41,50 @@ fn run(args: &[&str], lang: &str, timeout: Duration) -> Result { return Ok(stdout.trim_end().to_string()); } - let trimmed = stderr.trim(); + bail!("{}", classify_osa_error(stderr.trim(), lang)); +} + +/// Map an osascript stderr message to an actionable error string. Pure so it can +/// be unit-tested without shelling out. Ordering matters: permission errors are +/// the most actionable and have distinctive text, so they are checked before the +/// generic "reference does not resolve" index errors. +fn classify_osa_error(trimmed: &str, lang: &str) -> String { let lower = trimmed.to_lowercase(); + // Permission errors first. The real "not allowed assistive access" denial + // always carries that phrase; -25211 is errAXAPIDisabled. if lower.contains("assistive") || lower.contains("not allowed") - || lower.contains("-1719") + || lower.contains("-25211") || lower.contains("1002") { - bail!( + return format!( "Accessibility permission required. Run the `setup` action, or grant it in \ System Settings > Privacy & Security > Accessibility for your terminal/jcode. \ ({trimmed})" ); } if lower.contains("-1743") || lower.contains("not authorized to send apple events") { - bail!( + return format!( "Automation permission required for the target app. Approve the prompt, or grant it \ in System Settings > Privacy & Security > Automation. ({trimmed})" ); } + // -1719 (errAEIllegalIndex / "Invalid index") and -1728 (errAENoSuchObject / + // "Can't get ...") mean the target reference does not resolve: the app isn't + // running, has no front window, or an AX path index is out of range. This is + // NOT a permission problem, so report it accurately instead of sending the + // user to grant Accessibility (the previous behavior, which was misleading). + if lower.contains("-1719") || lower.contains("-1728") || lower.contains("invalid index") { + return format!( + "target not found: the app may not be running, has no front window, or an AX path \ + index is out of range. Check `list_apps`/`ui` and retry. ({trimmed})" + ); + } if trimmed.is_empty() { - bail!("{lang} failed (no error output)"); + return format!("{lang} failed (no error output)"); } - bail!("{lang} failed: {trimmed}"); + format!("{lang} failed: {trimmed}") } /// Run a command with a wall-clock timeout. Returns (success, stdout, stderr). @@ -154,4 +174,49 @@ mod tests { .to_string(); assert!(err.contains("timed out"), "got: {err}"); } + + #[test] + fn classifies_permission_error() { + let msg = classify_osa_error( + "execution error: System Events got an error: osascript is not allowed assistive access. (-25211)", + "AppleScript", + ); + assert!(msg.contains("Accessibility permission required"), "got: {msg}"); + } + + #[test] + fn classifies_invalid_index_as_not_found_not_permission() { + // Regression: -1719 used to be misreported as "Accessibility permission + // required" even though it means the target reference didn't resolve. + let msg = classify_osa_error( + "execution error: System Events got an error: Can\u{2019}t get application process 1 whose name = \"Codex\". Invalid index. (-1719)", + "AppleScript", + ); + assert!(msg.contains("target not found"), "got: {msg}"); + assert!(!msg.contains("Accessibility permission"), "got: {msg}"); + } + + #[test] + fn classifies_no_such_object_as_not_found() { + let msg = classify_osa_error( + "execution error: System Events got an error: Can\u{2019}t get front window of process \"Foo\". (-1728)", + "AppleScript", + ); + assert!(msg.contains("target not found"), "got: {msg}"); + } + + #[test] + fn classifies_automation_error() { + let msg = classify_osa_error( + "execution error: Not authorized to send Apple events to Finder. (-1743)", + "AppleScript", + ); + assert!(msg.contains("Automation permission required"), "got: {msg}"); + } + + #[test] + fn classifies_generic_error() { + let msg = classify_osa_error("execution error: something weird (-2700)", "JXA"); + assert!(msg.contains("JXA failed"), "got: {msg}"); + } } From fde76a1e1b8615be87ccb90e365433f2cec8b246 Mon Sep 17 00:00:00 2001 From: jeremy Date: Mon, 8 Jun 2026 21:52:54 -0700 Subject: [PATCH 3/4] fix(computer): make wait_for honor its own timeout_ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each wait_for poll dumped the AX tree (depth 10) under run_applescript's 20s default timeout. On a large app a single poll could take ~20s, so a wait_for with timeout_ms=4000 didn't return until ~20s — it ignored its own deadline. Bound each poll's scripting timeout to the time remaining (capped at 3s) and shrink the dump depth to 6 so polls are cheap and the call honors timeout_ms. Refs #350. --- .../jcode-app-core/src/tool/computer/sys.rs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/crates/jcode-app-core/src/tool/computer/sys.rs b/crates/jcode-app-core/src/tool/computer/sys.rs index d1f091b96..4de8ce225 100644 --- a/crates/jcode-app-core/src/tool/computer/sys.rs +++ b/crates/jcode-app-core/src/tool/computer/sys.rs @@ -67,7 +67,11 @@ pub fn notify(text: &str, title: Option<&str>) -> Result { /// Poll an app's AX tree until a substring appears (element_appears) or a /// timeout elapses. Cheap structural wait instead of fixed sleeps. pub fn wait_for(app: &str, contains: &str, timeout_ms: u64) -> Result { - let deadline = Instant::now() + Duration::from_millis(timeout_ms.min(60_000)); + let total = Duration::from_millis(timeout_ms.min(60_000)); + let deadline = Instant::now() + total; + // Keep each poll shallow so it returns quickly even on big apps; a deep + // (depth-10) dump of a large tree can itself take many seconds, which would + // blow past `timeout_ms`. Depth 6 captures visible labels/values cheaply. let script = format!( r#" using terms from application "System Events" @@ -91,7 +95,7 @@ end using terms from tell application "System Events" set frontApp to first application process whose name is {app} try - return my dumpEl(front window of frontApp, 0, 10) + return my dumpEl(front window of frontApp, 0, 6) on error return "" end try @@ -100,14 +104,21 @@ end tell app = osa::as_quote(app) ); loop { - let tree = osa::run_applescript(&script).unwrap_or_default(); + // Bound each poll by the time left so the whole call honors timeout_ms, + // and never let a single poll exceed ~3s. + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + bail!("wait_for timed out after {timeout_ms}ms (no '{contains}' in {app})"); + } + let poll_budget = remaining.min(Duration::from_secs(3)); + let tree = osa::run_applescript_timeout(&script, poll_budget).unwrap_or_default(); if tree.contains(contains) { return Ok(ToolOutput::new(format!("matched '{contains}' in {app}"))); } if Instant::now() >= deadline { bail!("wait_for timed out after {timeout_ms}ms (no '{contains}' in {app})"); } - sleep(Duration::from_millis(250)); + sleep(Duration::from_millis(200)); } } From c84ad1762bf5a71bb506254d12c83308e27331f6 Mon Sep 17 00:00:00 2001 From: jeremy Date: Mon, 8 Jun 2026 21:55:05 -0700 Subject: [PATCH 4/4] fix(computer): wait_for depth 8 + include description text for better matching --- crates/jcode-app-core/src/tool/computer/sys.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/jcode-app-core/src/tool/computer/sys.rs b/crates/jcode-app-core/src/tool/computer/sys.rs index 4de8ce225..ee56c6504 100644 --- a/crates/jcode-app-core/src/tool/computer/sys.rs +++ b/crates/jcode-app-core/src/tool/computer/sys.rs @@ -69,9 +69,8 @@ pub fn notify(text: &str, title: Option<&str>) -> Result { pub fn wait_for(app: &str, contains: &str, timeout_ms: u64) -> Result { let total = Duration::from_millis(timeout_ms.min(60_000)); let deadline = Instant::now() + total; - // Keep each poll shallow so it returns quickly even on big apps; a deep - // (depth-10) dump of a large tree can itself take many seconds, which would - // blow past `timeout_ms`. Depth 6 captures visible labels/values cheaply. + // Keep each poll bounded by the per-poll timeout below so it returns even on + // big apps. Depth 8 captures most visible labels/values while staying cheap. let script = format!( r#" using terms from application "System Events" @@ -84,6 +83,9 @@ using terms from application "System Events" try set out to out & (value of el as text) & " " end try + try + set out to out & (description of el as text) & " " + end try try repeat with child in (UI elements of el) set out to out & (my dumpEl(child, lvl + 1, maxlvl)) @@ -95,7 +97,7 @@ end using terms from tell application "System Events" set frontApp to first application process whose name is {app} try - return my dumpEl(front window of frontApp, 0, 6) + return my dumpEl(front window of frontApp, 0, 8) on error return "" end try