Skip to content

Commit 2df7df4

Browse files
Merge pull request #262 from askui/fix/mouve_mouse_tool
fix: handle comma-separated coordinates in move_mouse tool
2 parents d272558 + 333a4ce commit 2df7df4

1 file changed

Lines changed: 27 additions & 7 deletions

File tree

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import re
2+
13
from askui.models.shared import ComputerBaseTool, ToolTags
24
from askui.tools.computer_agent_os_facade import ComputerAgentOsFacade
35

@@ -8,17 +10,20 @@ class ComputerMoveMouseTool(ComputerBaseTool):
810
def __init__(self, agent_os: ComputerAgentOsFacade | None = None) -> None:
911
super().__init__(
1012
name="move_mouse",
11-
description="Move the mouse to a specific position.",
13+
description="""Move the mouse to a specific position.
14+
Pass x and y as separate integer values, not as a combined string.""",
1215
input_schema={
1316
"type": "object",
1417
"properties": {
1518
"x": {
1619
"type": "integer",
17-
"description": "The x coordinate of the mouse position as int.",
20+
"description": """The x (horizontal) pixel coordinate.
21+
Must be a single integer, e.g. 330.""",
1822
},
1923
"y": {
2024
"type": "integer",
21-
"description": "The y coordinate of the mouse position as int.",
25+
"description": """The y (vertical) pixel coordinate.
26+
Must be a single integer, e.g. 182.""",
2227
},
2328
},
2429
"required": ["x", "y"],
@@ -29,9 +34,24 @@ def __init__(self, agent_os: ComputerAgentOsFacade | None = None) -> None:
2934
self.is_cacheable = True
3035

3136
def __call__(self, x: int, y: int) -> str:
32-
# for some reason, the agent occasionally calls the tool with the coords
33-
# encoded as strings, which will lead the tool to failing. To prevent this we
34-
# will explicitly convert to int here
35-
x, y = int(x), int(y)
37+
# The agent occasionally passes coordinates incorrectly:
38+
# 1. As strings instead of ints (e.g., x="330", y="182")
39+
# 2. Both coords as a single comma-separated string in x
40+
# (e.g., x="330, 182" or x="330, ")
41+
# We extract all numbers from the string representations to handle both cases.
42+
if not (isinstance(x, int) and isinstance(y, int)):
43+
x, y = self._parse_coordinates(x, y) # type: ignore[unreachable]
3644
self.agent_os.mouse_move(x, y)
3745
return f"Mouse was moved to position ({x}, {y})."
46+
47+
@staticmethod
48+
def _parse_coordinates(x: float | str, y: float | str) -> tuple[int, int]:
49+
_NUMBER_PATTERN = re.compile(r"-?\d+")
50+
combined = f"{x},{y}"
51+
numbers = _NUMBER_PATTERN.findall(combined)
52+
if not len(numbers) == 2:
53+
error_msg = f"""Could not parse mouse_move coordinates from provided
54+
parameters x={x}, y={y}. The parameters x and y must be passed as separate
55+
integer values!"""
56+
raise ValueError(error_msg)
57+
return int(numbers[0]), int(numbers[1])

0 commit comments

Comments
 (0)