diff --git a/crates/jcode-app-core/src/tool/computer/ax.rs b/crates/jcode-app-core/src/tool/computer/ax.rs index d1ae8c2e1..653c42422 100644 --- a/crates/jcode-app-core/src/tool/computer/ax.rs +++ b/crates/jcode-app-core/src/tool/computer/ax.rs @@ -289,40 +289,54 @@ pub fn select_menu(app: &str, path: &[String]) -> Result { /// Return the element at a screen point (role/title), useful to confirm targets. pub fn element_at(app: &str, x: f64, y: f64) -> Result { - // System Events can hit-test via "UI element N" is awkward; use AX position - // matching by walking and finding the deepest element containing the point. + // Hit-test by walking the AX tree, but PRUNE: only descend into a subtree + // whose own frame contains the point. A child's frame is contained in its + // parent's, so a parent that doesn't contain the point can't have a matching + // descendant. This turns a full-tree walk (which times out on huge apps like + // System Settings) into a path-length walk. Also bound the depth defensively. let script = format!( r#" using terms from application "System Events" - on hit(el, px, py, idxPath, best) - set result to best + on inFrame(el, px, py) try set p to position of el set sz to size of el set x1 to item 1 of p set y1 to item 2 of p - set x2 to x1 + (item 1 of sz) - set y2 to y1 + (item 2 of sz) - if px >= x1 and px <= x2 and py >= y1 and py <= y2 then - set r to "" - try - set r to (role of el as text) - end try - set t to "" - try - set t to (title of el as text) - end try - set result to idxPath & " " & r & " \"" & t & "\" @(" & x1 & "," & y1 & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")" + if px >= x1 and px <= (x1 + (item 1 of sz)) and py >= y1 and py <= (y1 + (item 2 of sz)) then + return true end if end try + return false + end inFrame + on hit(el, px, py, idxPath, maxlvl, lvl, best) + set bestHit to best + if lvl > maxlvl then return bestHit + -- Record this element if it contains the point (deepest wins). + if my inFrame(el, px, py) then + set r to "" + try + set r to (role of el as text) + end try + set t to "" + try + set t to (title of el as text) + end try + set p to position of el + set sz to size of el + set bestHit to idxPath & " " & r & " \"" & t & "\" @(" & (item 1 of p) & "," & (item 2 of p) & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")" + end if + -- Only descend into children that themselves contain the point. try set i to 0 repeat with child in (UI elements of el) set i to i + 1 - set result to my hit(child, px, py, idxPath & "." & i, result) + if my inFrame(child, px, py) then + set bestHit to my hit(child, px, py, idxPath & "." & i, maxlvl, lvl + 1, bestHit) + end if end repeat end try - return result + return bestHit end hit end using terms from @@ -330,7 +344,7 @@ tell application "System Events" set frontApp to first application process whose name is {app} try set win to front window of frontApp - set out to my hit(win, {x}, {y}, "", "(none)") + set out to my hit(win, {x}, {y}, "", 40, 0, "(none)") on error errMsg set out to "(error: " & errMsg & ")" end try @@ -341,7 +355,7 @@ end tell x = x, y = y ); - let res = osa::run_applescript(&script)?; + let res = osa::run_applescript_timeout(&script, Duration::from_secs(12))?; Ok(ToolOutput::new(format!("Deepest element at ({x:.0},{y:.0}) in {app}:\n{res}")) .with_title("element_at") .with_metadata(json!({"app": app, "x": x, "y": y}))) diff --git a/crates/jcode-app-core/src/tool/computer/mod.rs b/crates/jcode-app-core/src/tool/computer/mod.rs index 4557a89e0..5392d340a 100644 --- a/crates/jcode-app-core/src/tool/computer/mod.rs +++ b/crates/jcode-app-core/src/tool/computer/mod.rs @@ -226,7 +226,20 @@ impl Tool for ComputerTool { }, "script": { "type": "string", "description": "AppleScript (run_applescript) or JS (run_jxa) source." }, "depth": { "type": "integer", "description": "Max AX tree depth for ui/find_element (default 12)." }, - "dry_run": { "type": "boolean", "description": "For mutating actions: report the intended action without performing it." } + "to_x": { "type": "number" }, + "to_y": { "type": "number" }, + "dx": { "type": "integer" }, + "dy": { "type": "integer" }, + "w": { "type": "number" }, + "h": { "type": "number" }, + "window_id": { "type": "integer", "description": "window_screenshot id (from list_windows)." }, + "menu_path": { "type": "array", "items": { "type": "string" }, "description": "select_menu path, e.g. [\"File\",\"Save\"]." }, + "ax_action": { "type": "string", "description": "perform_action AX action, e.g. AXShowMenu." }, + "contains": { "type": "string", "description": "wait_for: substring to await." }, + "timeout_ms": { "type": "integer" }, + "region": { "type": "array", "items": { "type": "number" }, "description": "ocr region [x,y,w,h]; omit for full screen." }, + "level": { "type": "number", "description": "set_brightness 0..1." }, + "dry_run": { "type": "boolean", "description": "Mutating actions: report intended action without doing it." } } }) } diff --git a/crates/jcode-app-core/src/tool/computer/osa.rs b/crates/jcode-app-core/src/tool/computer/osa.rs index 372876a88..66e7b855e 100644 --- a/crates/jcode-app-core/src/tool/computer/osa.rs +++ b/crates/jcode-app-core/src/tool/computer/osa.rs @@ -41,30 +41,50 @@ fn run(args: &[&str], lang: &str, timeout: Duration) -> Result { return Ok(stdout.trim_end().to_string()); } - let trimmed = stderr.trim(); + bail!("{}", classify_osa_error(stderr.trim(), lang)); +} + +/// Map an osascript stderr message to an actionable error string. Pure so it can +/// be unit-tested without shelling out. Ordering matters: permission errors are +/// the most actionable and have distinctive text, so they are checked before the +/// generic "reference does not resolve" index errors. +fn classify_osa_error(trimmed: &str, lang: &str) -> String { let lower = trimmed.to_lowercase(); + // Permission errors first. The real "not allowed assistive access" denial + // always carries that phrase; -25211 is errAXAPIDisabled. if lower.contains("assistive") || lower.contains("not allowed") - || lower.contains("-1719") + || lower.contains("-25211") || lower.contains("1002") { - bail!( + return format!( "Accessibility permission required. Run the `setup` action, or grant it in \ System Settings > Privacy & Security > Accessibility for your terminal/jcode. \ ({trimmed})" ); } if lower.contains("-1743") || lower.contains("not authorized to send apple events") { - bail!( + return format!( "Automation permission required for the target app. Approve the prompt, or grant it \ in System Settings > Privacy & Security > Automation. ({trimmed})" ); } + // -1719 (errAEIllegalIndex / "Invalid index") and -1728 (errAENoSuchObject / + // "Can't get ...") mean the target reference does not resolve: the app isn't + // running, has no front window, or an AX path index is out of range. This is + // NOT a permission problem, so report it accurately instead of sending the + // user to grant Accessibility (the previous behavior, which was misleading). + if lower.contains("-1719") || lower.contains("-1728") || lower.contains("invalid index") { + return format!( + "target not found: the app may not be running, has no front window, or an AX path \ + index is out of range. Check `list_apps`/`ui` and retry. ({trimmed})" + ); + } if trimmed.is_empty() { - bail!("{lang} failed (no error output)"); + return format!("{lang} failed (no error output)"); } - bail!("{lang} failed: {trimmed}"); + format!("{lang} failed: {trimmed}") } /// Run a command with a wall-clock timeout. Returns (success, stdout, stderr). @@ -154,4 +174,49 @@ mod tests { .to_string(); assert!(err.contains("timed out"), "got: {err}"); } + + #[test] + fn classifies_permission_error() { + let msg = classify_osa_error( + "execution error: System Events got an error: osascript is not allowed assistive access. (-25211)", + "AppleScript", + ); + assert!(msg.contains("Accessibility permission required"), "got: {msg}"); + } + + #[test] + fn classifies_invalid_index_as_not_found_not_permission() { + // Regression: -1719 used to be misreported as "Accessibility permission + // required" even though it means the target reference didn't resolve. + let msg = classify_osa_error( + "execution error: System Events got an error: Can\u{2019}t get application process 1 whose name = \"Codex\". Invalid index. (-1719)", + "AppleScript", + ); + assert!(msg.contains("target not found"), "got: {msg}"); + assert!(!msg.contains("Accessibility permission"), "got: {msg}"); + } + + #[test] + fn classifies_no_such_object_as_not_found() { + let msg = classify_osa_error( + "execution error: System Events got an error: Can\u{2019}t get front window of process \"Foo\". (-1728)", + "AppleScript", + ); + assert!(msg.contains("target not found"), "got: {msg}"); + } + + #[test] + fn classifies_automation_error() { + let msg = classify_osa_error( + "execution error: Not authorized to send Apple events to Finder. (-1743)", + "AppleScript", + ); + assert!(msg.contains("Automation permission required"), "got: {msg}"); + } + + #[test] + fn classifies_generic_error() { + let msg = classify_osa_error("execution error: something weird (-2700)", "JXA"); + assert!(msg.contains("JXA failed"), "got: {msg}"); + } } diff --git a/crates/jcode-app-core/src/tool/computer/sys.rs b/crates/jcode-app-core/src/tool/computer/sys.rs index d1f091b96..ee56c6504 100644 --- a/crates/jcode-app-core/src/tool/computer/sys.rs +++ b/crates/jcode-app-core/src/tool/computer/sys.rs @@ -67,7 +67,10 @@ pub fn notify(text: &str, title: Option<&str>) -> Result { /// Poll an app's AX tree until a substring appears (element_appears) or a /// timeout elapses. Cheap structural wait instead of fixed sleeps. pub fn wait_for(app: &str, contains: &str, timeout_ms: u64) -> Result { - let deadline = Instant::now() + Duration::from_millis(timeout_ms.min(60_000)); + let total = Duration::from_millis(timeout_ms.min(60_000)); + let deadline = Instant::now() + total; + // Keep each poll bounded by the per-poll timeout below so it returns even on + // big apps. Depth 8 captures most visible labels/values while staying cheap. let script = format!( r#" using terms from application "System Events" @@ -80,6 +83,9 @@ using terms from application "System Events" try set out to out & (value of el as text) & " " end try + try + set out to out & (description of el as text) & " " + end try try repeat with child in (UI elements of el) set out to out & (my dumpEl(child, lvl + 1, maxlvl)) @@ -91,7 +97,7 @@ end using terms from tell application "System Events" set frontApp to first application process whose name is {app} try - return my dumpEl(front window of frontApp, 0, 10) + return my dumpEl(front window of frontApp, 0, 8) on error return "" end try @@ -100,14 +106,21 @@ end tell app = osa::as_quote(app) ); loop { - let tree = osa::run_applescript(&script).unwrap_or_default(); + // Bound each poll by the time left so the whole call honors timeout_ms, + // and never let a single poll exceed ~3s. + let remaining = deadline.saturating_duration_since(Instant::now()); + if remaining.is_zero() { + bail!("wait_for timed out after {timeout_ms}ms (no '{contains}' in {app})"); + } + let poll_budget = remaining.min(Duration::from_secs(3)); + let tree = osa::run_applescript_timeout(&script, poll_budget).unwrap_or_default(); if tree.contains(contains) { return Ok(ToolOutput::new(format!("matched '{contains}' in {app}"))); } if Instant::now() >= deadline { bail!("wait_for timed out after {timeout_ms}ms (no '{contains}' in {app})"); } - sleep(Duration::from_millis(250)); + sleep(Duration::from_millis(200)); } } diff --git a/crates/jcode-app-core/src/tool/computer/tests.rs b/crates/jcode-app-core/src/tool/computer/tests.rs index 4241d1dad..3780ea081 100644 --- a/crates/jcode-app-core/src/tool/computer/tests.rs +++ b/crates/jcode-app-core/src/tool/computer/tests.rs @@ -97,16 +97,35 @@ fn is_mutating_classifies() { assert!(!super::is_mutating("discover")); } +#[test] +fn schema_declares_every_input_field() { + // Regression guard for the "schema omits half its fields" bug: every field + // `dispatch` can require must be declared in `parameters_schema()`, or the + // model can never send it and the action is dead on arrival. + let tool = ComputerTool::new(); + let schema = tool.parameters_schema(); + let props = schema["properties"].as_object().expect("properties object"); + for field in [ + "action", "category", "x", "y", "to_x", "to_y", "w", "h", "text", "keys", "dx", "dy", + "depth", "app", "role", "title", "value", "element", "ax_action", "menu_path", + "window_id", "script", "contains", "timeout_ms", "region", "level", "dry_run", + ] { + assert!(props.contains_key(field), "schema is missing field `{field}`"); + } +} + #[test] fn schema_is_compact() { // Guard against context bloat: the always-on schema + description must stay - // small. Measured well under this bound; alert if it balloons. + // small. Action *specs* live in `discover` (progressive disclosure), but + // every input *field* must be declared here or the model can't send it, so + // the field set (not the action set) sets the floor. Keep always-on cost + // roughly under ~900 tokens; alert if it balloons past that. let tool = ComputerTool::new(); let schema = serde_json::to_string(&tool.parameters_schema()).unwrap(); let total = tool.description().len() + schema.len(); - // ~4 chars/token; keep always-on cost roughly under ~700 tokens. assert!( - total < 2800, + total < 3500, "computer tool always-on size grew to {total} chars (~{} tokens)", total / 4 );