Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 34 additions & 20 deletions crates/jcode-app-core/src/tool/computer/ax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,48 +289,62 @@ pub fn select_menu(app: &str, path: &[String]) -> Result<ToolOutput> {

/// Return the element at a screen point (role/title), useful to confirm targets.
pub fn element_at(app: &str, x: f64, y: f64) -> Result<ToolOutput> {
// System Events can hit-test via "UI element N" is awkward; use AX position
// matching by walking and finding the deepest element containing the point.
// Hit-test by walking the AX tree, but PRUNE: only descend into a subtree
// whose own frame contains the point. A child's frame is contained in its
// parent's, so a parent that doesn't contain the point can't have a matching
// descendant. This turns a full-tree walk (which times out on huge apps like
// System Settings) into a path-length walk. Also bound the depth defensively.
let script = format!(
r#"
using terms from application "System Events"
on hit(el, px, py, idxPath, best)
set result to best
on inFrame(el, px, py)
try
set p to position of el
set sz to size of el
set x1 to item 1 of p
set y1 to item 2 of p
set x2 to x1 + (item 1 of sz)
set y2 to y1 + (item 2 of sz)
if px >= x1 and px <= x2 and py >= y1 and py <= y2 then
set r to ""
try
set r to (role of el as text)
end try
set t to ""
try
set t to (title of el as text)
end try
set result to idxPath & " " & r & " \"" & t & "\" @(" & x1 & "," & y1 & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")"
if px >= x1 and px <= (x1 + (item 1 of sz)) and py >= y1 and py <= (y1 + (item 2 of sz)) then
return true
end if
end try
return false
end inFrame
on hit(el, px, py, idxPath, maxlvl, lvl, best)
set bestHit to best
if lvl > maxlvl then return bestHit
-- Record this element if it contains the point (deepest wins).
if my inFrame(el, px, py) then
set r to ""
try
set r to (role of el as text)
end try
set t to ""
try
set t to (title of el as text)
end try
set p to position of el
set sz to size of el
set bestHit to idxPath & " " & r & " \"" & t & "\" @(" & (item 1 of p) & "," & (item 2 of p) & " " & (item 1 of sz) & "x" & (item 2 of sz) & ")"
end if
-- Only descend into children that themselves contain the point.
try
set i to 0
repeat with child in (UI elements of el)
set i to i + 1
set result to my hit(child, px, py, idxPath & "." & i, result)
if my inFrame(child, px, py) then
set bestHit to my hit(child, px, py, idxPath & "." & i, maxlvl, lvl + 1, bestHit)
end if
end repeat
end try
return result
return bestHit
end hit
end using terms from

tell application "System Events"
set frontApp to first application process whose name is {app}
try
set win to front window of frontApp
set out to my hit(win, {x}, {y}, "", "(none)")
set out to my hit(win, {x}, {y}, "", 40, 0, "(none)")
on error errMsg
set out to "(error: " & errMsg & ")"
end try
Expand All @@ -341,7 +355,7 @@ end tell
x = x,
y = y
);
let res = osa::run_applescript(&script)?;
let res = osa::run_applescript_timeout(&script, Duration::from_secs(12))?;
Ok(ToolOutput::new(format!("Deepest element at ({x:.0},{y:.0}) in {app}:\n{res}"))
.with_title("element_at")
.with_metadata(json!({"app": app, "x": x, "y": y})))
Expand Down
15 changes: 14 additions & 1 deletion crates/jcode-app-core/src/tool/computer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,20 @@ impl Tool for ComputerTool {
},
"script": { "type": "string", "description": "AppleScript (run_applescript) or JS (run_jxa) source." },
"depth": { "type": "integer", "description": "Max AX tree depth for ui/find_element (default 12)." },
"dry_run": { "type": "boolean", "description": "For mutating actions: report the intended action without performing it." }
"to_x": { "type": "number" },
"to_y": { "type": "number" },
"dx": { "type": "integer" },
"dy": { "type": "integer" },
"w": { "type": "number" },
"h": { "type": "number" },
"window_id": { "type": "integer", "description": "window_screenshot id (from list_windows)." },
"menu_path": { "type": "array", "items": { "type": "string" }, "description": "select_menu path, e.g. [\"File\",\"Save\"]." },
"ax_action": { "type": "string", "description": "perform_action AX action, e.g. AXShowMenu." },
"contains": { "type": "string", "description": "wait_for: substring to await." },
"timeout_ms": { "type": "integer" },
"region": { "type": "array", "items": { "type": "number" }, "description": "ocr region [x,y,w,h]; omit for full screen." },
"level": { "type": "number", "description": "set_brightness 0..1." },
"dry_run": { "type": "boolean", "description": "Mutating actions: report intended action without doing it." }
}
})
}
Expand Down
77 changes: 71 additions & 6 deletions crates/jcode-app-core/src/tool/computer/osa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,30 +41,50 @@ fn run(args: &[&str], lang: &str, timeout: Duration) -> Result<String> {
return Ok(stdout.trim_end().to_string());
}

let trimmed = stderr.trim();
bail!("{}", classify_osa_error(stderr.trim(), lang));
}

/// Map an osascript stderr message to an actionable error string. Pure so it can
/// be unit-tested without shelling out. Ordering matters: permission errors are
/// the most actionable and have distinctive text, so they are checked before the
/// generic "reference does not resolve" index errors.
fn classify_osa_error(trimmed: &str, lang: &str) -> String {
let lower = trimmed.to_lowercase();

// Permission errors first. The real "not allowed assistive access" denial
// always carries that phrase; -25211 is errAXAPIDisabled.
if lower.contains("assistive")
|| lower.contains("not allowed")
|| lower.contains("-1719")
|| lower.contains("-25211")
|| lower.contains("1002")
{
bail!(
return format!(
"Accessibility permission required. Run the `setup` action, or grant it in \
System Settings > Privacy & Security > Accessibility for your terminal/jcode. \
({trimmed})"
);
}
if lower.contains("-1743") || lower.contains("not authorized to send apple events") {
bail!(
return format!(
"Automation permission required for the target app. Approve the prompt, or grant it \
in System Settings > Privacy & Security > Automation. ({trimmed})"
);
}
// -1719 (errAEIllegalIndex / "Invalid index") and -1728 (errAENoSuchObject /
// "Can't get ...") mean the target reference does not resolve: the app isn't
// running, has no front window, or an AX path index is out of range. This is
// NOT a permission problem, so report it accurately instead of sending the
// user to grant Accessibility (the previous behavior, which was misleading).
if lower.contains("-1719") || lower.contains("-1728") || lower.contains("invalid index") {
return format!(
"target not found: the app may not be running, has no front window, or an AX path \
index is out of range. Check `list_apps`/`ui` and retry. ({trimmed})"
);
}
if trimmed.is_empty() {
bail!("{lang} failed (no error output)");
return format!("{lang} failed (no error output)");
}
bail!("{lang} failed: {trimmed}");
format!("{lang} failed: {trimmed}")
}

/// Run a command with a wall-clock timeout. Returns (success, stdout, stderr).
Expand Down Expand Up @@ -154,4 +174,49 @@ mod tests {
.to_string();
assert!(err.contains("timed out"), "got: {err}");
}

#[test]
fn classifies_permission_error() {
let msg = classify_osa_error(
"execution error: System Events got an error: osascript is not allowed assistive access. (-25211)",
"AppleScript",
);
assert!(msg.contains("Accessibility permission required"), "got: {msg}");
}

#[test]
fn classifies_invalid_index_as_not_found_not_permission() {
// Regression: -1719 used to be misreported as "Accessibility permission
// required" even though it means the target reference didn't resolve.
let msg = classify_osa_error(
"execution error: System Events got an error: Can\u{2019}t get application process 1 whose name = \"Codex\". Invalid index. (-1719)",
"AppleScript",
);
assert!(msg.contains("target not found"), "got: {msg}");
assert!(!msg.contains("Accessibility permission"), "got: {msg}");
}

#[test]
fn classifies_no_such_object_as_not_found() {
let msg = classify_osa_error(
"execution error: System Events got an error: Can\u{2019}t get front window of process \"Foo\". (-1728)",
"AppleScript",
);
assert!(msg.contains("target not found"), "got: {msg}");
}

#[test]
fn classifies_automation_error() {
let msg = classify_osa_error(
"execution error: Not authorized to send Apple events to Finder. (-1743)",
"AppleScript",
);
assert!(msg.contains("Automation permission required"), "got: {msg}");
}

#[test]
fn classifies_generic_error() {
let msg = classify_osa_error("execution error: something weird (-2700)", "JXA");
assert!(msg.contains("JXA failed"), "got: {msg}");
}
}
21 changes: 17 additions & 4 deletions crates/jcode-app-core/src/tool/computer/sys.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ pub fn notify(text: &str, title: Option<&str>) -> Result<ToolOutput> {
/// Poll an app's AX tree until a substring appears (element_appears) or a
/// timeout elapses. Cheap structural wait instead of fixed sleeps.
pub fn wait_for(app: &str, contains: &str, timeout_ms: u64) -> Result<ToolOutput> {
let deadline = Instant::now() + Duration::from_millis(timeout_ms.min(60_000));
let total = Duration::from_millis(timeout_ms.min(60_000));
let deadline = Instant::now() + total;
// Keep each poll bounded by the per-poll timeout below so it returns even on
// big apps. Depth 8 captures most visible labels/values while staying cheap.
let script = format!(
r#"
using terms from application "System Events"
Expand All @@ -80,6 +83,9 @@ using terms from application "System Events"
try
set out to out & (value of el as text) & " "
end try
try
set out to out & (description of el as text) & " "
end try
try
repeat with child in (UI elements of el)
set out to out & (my dumpEl(child, lvl + 1, maxlvl))
Expand All @@ -91,7 +97,7 @@ end using terms from
tell application "System Events"
set frontApp to first application process whose name is {app}
try
return my dumpEl(front window of frontApp, 0, 10)
return my dumpEl(front window of frontApp, 0, 8)
on error
return ""
end try
Expand All @@ -100,14 +106,21 @@ end tell
app = osa::as_quote(app)
);
loop {
let tree = osa::run_applescript(&script).unwrap_or_default();
// Bound each poll by the time left so the whole call honors timeout_ms,
// and never let a single poll exceed ~3s.
let remaining = deadline.saturating_duration_since(Instant::now());
if remaining.is_zero() {
bail!("wait_for timed out after {timeout_ms}ms (no '{contains}' in {app})");
}
let poll_budget = remaining.min(Duration::from_secs(3));
let tree = osa::run_applescript_timeout(&script, poll_budget).unwrap_or_default();
if tree.contains(contains) {
return Ok(ToolOutput::new(format!("matched '{contains}' in {app}")));
}
if Instant::now() >= deadline {
bail!("wait_for timed out after {timeout_ms}ms (no '{contains}' in {app})");
}
sleep(Duration::from_millis(250));
sleep(Duration::from_millis(200));
}
}

Expand Down
25 changes: 22 additions & 3 deletions crates/jcode-app-core/src/tool/computer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,35 @@ fn is_mutating_classifies() {
assert!(!super::is_mutating("discover"));
}

#[test]
fn schema_declares_every_input_field() {
// Regression guard for the "schema omits half its fields" bug: every field
// `dispatch` can require must be declared in `parameters_schema()`, or the
// model can never send it and the action is dead on arrival.
let tool = ComputerTool::new();
let schema = tool.parameters_schema();
let props = schema["properties"].as_object().expect("properties object");
for field in [
"action", "category", "x", "y", "to_x", "to_y", "w", "h", "text", "keys", "dx", "dy",
"depth", "app", "role", "title", "value", "element", "ax_action", "menu_path",
"window_id", "script", "contains", "timeout_ms", "region", "level", "dry_run",
] {
assert!(props.contains_key(field), "schema is missing field `{field}`");
}
}

#[test]
fn schema_is_compact() {
// Guard against context bloat: the always-on schema + description must stay
// small. Measured well under this bound; alert if it balloons.
// small. Action *specs* live in `discover` (progressive disclosure), but
// every input *field* must be declared here or the model can't send it, so
// the field set (not the action set) sets the floor. Keep always-on cost
// roughly under ~900 tokens; alert if it balloons past that.
let tool = ComputerTool::new();
let schema = serde_json::to_string(&tool.parameters_schema()).unwrap();
let total = tool.description().len() + schema.len();
// ~4 chars/token; keep always-on cost roughly under ~700 tokens.
assert!(
total < 2800,
total < 3500,
"computer tool always-on size grew to {total} chars (~{} tokens)",
total / 4
);
Expand Down
Loading