-
Notifications
You must be signed in to change notification settings - Fork 110
Expand file tree
/
Copy pathschemas.py
More file actions
297 lines (238 loc) · 11.1 KB
/
schemas.py
File metadata and controls
297 lines (238 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
from enum import Enum
from typing import Any, Optional, Union
from pydantic import BaseModel, ConfigDict, Field, field_serializer
# Default extraction schema that matches the TypeScript version
DEFAULT_EXTRACT_SCHEMA = {
"type": "object",
"properties": {"extraction": {"type": "string"}},
"required": ["extraction"],
}
# TODO: Remove this
class AvailableModel(str, Enum):
GPT_4O = "gpt-4o"
GPT_4O_MINI = "gpt-4o-mini"
GPT_4_1_MINI = "gpt-4.1-mini"
CLAUDE_3_5_SONNET_LATEST = "claude-3-5-sonnet-latest"
CLAUDE_3_7_SONNET_LATEST = "claude-3-7-sonnet-latest"
COMPUTER_USE_PREVIEW = "computer-use-preview"
GEMINI_2_0_FLASH = "gemini-2.0-flash"
# 添加通义千问模型
QWEN_TURBO = "qwen-turbo"
QWEN_PLUS = "qwen-plus"
QWEN_MAX = "qwen-max"
class StagehandBaseModel(BaseModel):
"""Base model for all Stagehand models with camelCase conversion support"""
model_config = ConfigDict(
populate_by_name=True, # Allow accessing fields by their Python name
alias_generator=lambda field_name: "".join(
[field_name.split("_")[0]]
+ [word.capitalize() for word in field_name.split("_")[1:]]
), # snake_case to camelCase
)
class ActOptions(StagehandBaseModel):
"""
Options for the 'act' command.
Attributes:
action (str): The action command to be executed by the AI.
variables (Optional[dict[str, str]]): Key-value pairs for variable substitution.
model_name (Optional[str]): The model to use for processing.
slow_dom_based_act (Optional[bool]): Whether to use DOM-based action execution.
dom_settle_timeout_ms (Optional[int]): Additional time for DOM to settle after an action.
timeout_ms (Optional[int]): Timeout for the action in milliseconds.
"""
action: str = Field(..., description="The action command to be executed by the AI.")
variables: Optional[dict[str, str]] = None
model_name: Optional[str] = None
slow_dom_based_act: Optional[bool] = None
dom_settle_timeout_ms: Optional[int] = None
timeout_ms: Optional[int] = None
model_client_options: Optional[dict[str, Any]] = None
iframes: Optional[bool] = None
class ActResult(StagehandBaseModel):
"""
Result of the 'act' command.
Attributes:
success (bool): Whether the action was successful.
message (str): Message from the AI about the action.
action (str): The action command that was executed.
"""
success: bool = Field(..., description="Whether the action was successful.")
message: str = Field(..., description="Message from the AI about the action.")
action: str = Field(..., description="The action command that was executed.")
class ExtractOptions(StagehandBaseModel):
"""
Options for the 'extract' command.
Attributes:
instruction (str): Instruction specifying what data to extract using AI.
model_name (Optional[str]): The model to use for processing.
selector (Optional[str]): CSS selector to limit extraction to.
schema_definition (Union[dict[str, Any], type[BaseModel]]): A JSON schema or Pydantic model that defines the structure of the expected data.
Note: If passing a Pydantic model, invoke its .model_json_schema() method to ensure the schema is JSON serializable.
use_text_extract (Optional[bool]): Whether to use text-based extraction.
dom_settle_timeout_ms (Optional[int]): Additional time for DOM to settle before extraction.
"""
instruction: str = Field(
..., description="Instruction specifying what data to extract using AI."
)
model_name: Optional[str] = None
selector: Optional[str] = None
# IMPORTANT: If using a Pydantic model for schema_definition, please call its .model_json_schema() method
# to convert it to a JSON serializable dictionary before sending it with the extract command.
schema_definition: Union[dict[str, Any], type[BaseModel]] = Field(
default=DEFAULT_EXTRACT_SCHEMA,
description="A JSON schema or Pydantic model that defines the structure of the expected data.",
)
use_text_extract: Optional[bool] = None
dom_settle_timeout_ms: Optional[int] = None
model_client_options: Optional[dict[Any, Any]] = None
iframes: Optional[bool] = None
@field_serializer("schema_definition")
def serialize_schema_definition(
self, schema_definition: Union[dict[str, Any], type[BaseModel]]
) -> dict[str, Any]:
"""Serialize schema_definition to a JSON schema if it's a Pydantic model"""
if isinstance(schema_definition, type) and issubclass(
schema_definition, BaseModel
):
# Get the JSON schema using default ref_template ('#/$defs/{model}')
schema = schema_definition.model_json_schema()
defs_key = "$defs"
if defs_key not in schema:
defs_key = "definitions"
if defs_key not in schema:
return schema
definitions = schema.get(defs_key, {})
if definitions:
self._resolve_references(schema, definitions, f"#/{defs_key}/")
schema.pop(defs_key, None)
return schema
elif isinstance(schema_definition, dict):
return schema_definition
raise TypeError("schema_definition must be a Pydantic model or a dict")
def _resolve_references(self, obj: Any, definitions: dict, ref_prefix: str) -> None:
"""Recursively resolve $ref references in a schema using definitions."""
if isinstance(obj, dict):
if "$ref" in obj and obj["$ref"].startswith(ref_prefix):
ref_name = obj["$ref"][len(ref_prefix) :] # Get name after prefix
if ref_name in definitions:
original_keys = {k: v for k, v in obj.items() if k != "$ref"}
resolved_definition = definitions[ref_name].copy() # Use a copy
self._resolve_references(
resolved_definition, definitions, ref_prefix
)
obj.clear()
obj.update(resolved_definition)
obj.update(original_keys)
else:
# Recursively process all values in the dictionary
for _, value in obj.items():
self._resolve_references(value, definitions, ref_prefix)
elif isinstance(obj, list):
# Process all items in the list
for item in obj:
self._resolve_references(item, definitions, ref_prefix)
model_config = ConfigDict(arbitrary_types_allowed=True)
class ExtractResult(StagehandBaseModel):
"""
Result of the 'extract' command.
This is a generic model to hold extraction results of different types.
The actual fields will depend on the schema provided in ExtractOptions.
"""
# This class is intentionally left without fields so it can accept
# any fields from the extraction result based on the schema
model_config = ConfigDict(extra="allow") # Allow any extra fields
def __getitem__(self, key):
"""
Enable dictionary-style access to attributes.
This allows usage like result["selector"] in addition to result.selector
"""
return getattr(self, key)
class ObserveOptions(StagehandBaseModel):
"""
Options for the 'observe' command.
Attributes:
instruction (str): Instruction detailing what the AI should observe.
model_name (Optional[str]): The model to use for processing.
return_action (Optional[bool]): Whether to include action information in the result.
draw_overlay (Optional[bool]): Whether to draw an overlay on observed elements.
dom_settle_timeout_ms (Optional[int]): Additional time for DOM to settle before observation.
"""
instruction: str = Field(
..., description="Instruction detailing what the AI should observe."
)
model_name: Optional[str] = None
draw_overlay: Optional[bool] = None
dom_settle_timeout_ms: Optional[int] = None
model_client_options: Optional[dict[str, Any]] = None
iframes: Optional[bool] = None
class ObserveResult(StagehandBaseModel):
"""
Result of the 'observe' command.
Attributes:
selector (str): The selector of the observed element.
description (str): The description of the observed element.
backend_node_id (Optional[int]): The backend node ID.
method (Optional[str]): The method to execute.
arguments (Optional[list[str]]): The arguments for the method.
"""
selector: str = Field(..., description="The selector of the observed element.")
description: str = Field(
..., description="The description of the observed element."
)
backend_node_id: Optional[int] = None
method: Optional[str] = None
arguments: Optional[list[str]] = None
def __getitem__(self, key):
"""
Enable dictionary-style access to attributes.
This allows usage like result["selector"] in addition to result.selector
"""
return getattr(self, key)
class AgentProvider(str, Enum):
"""Supported agent providers"""
OPENAI = "openai"
ANTHROPIC = "anthropic"
GOOGLE = "google"
class AgentConfig(StagehandBaseModel):
"""
Configuration for agent execution.
Attributes:
provider (Optional[AgentProvider]): The provider to use (openai or anthropic).
model (Optional[str]): The model name to use.
instructions (Optional[str]): Custom instructions for the agent.
options (Optional[dict[str, Any]]): Additional provider-specific options.
"""
provider: Optional[AgentProvider] = None
model: Optional[str] = None
instructions: Optional[str] = None
options: Optional[dict[str, Any]] = None
class AgentExecuteOptions(StagehandBaseModel):
"""
Options for agent execution.
Attributes:
instruction (str): The task instruction for the agent.
max_steps (Optional[int]): Maximum number of steps the agent can take.
auto_screenshot (Optional[bool]): Whether to automatically take screenshots between steps.
wait_between_actions (Optional[int]): Milliseconds to wait between actions.
context (Optional[str]): Additional context for the agent.
"""
instruction: str = Field(..., description="The task instruction for the agent.")
max_steps: Optional[int] = None
auto_screenshot: Optional[bool] = None
wait_between_actions: Optional[int] = None
context: Optional[str] = None
class AgentExecuteResult(StagehandBaseModel):
"""
Result of agent execution.
Attributes:
success (bool): Whether the execution was successful.
actions (Optional[list[dict[str, Any]]]): Actions taken by the agent.
message (Optional[str]): Final result message from the agent.
completed (bool): Whether the agent has completed its task.
"""
success: bool = Field(..., description="Whether the execution was successful.")
actions: Optional[list[dict[str, Any]]] = None
message: Optional[str] = None
completed: bool = Field(
False, description="Whether the agent has completed its task."
)