Skip to content

Commit 0dde544

Browse files
authored
Merge pull request #245 from ConstantineLiu/thinking-fixed-branch
修改了thinking参数传入的问题,并且添加了文档
2 parents 4d15404 + 554b170 commit 0dde544

5 files changed

Lines changed: 436 additions & 96 deletions

File tree

browser_utils/page_controller.py

Lines changed: 141 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
CLEAR_CHAT_BUTTON_SELECTOR, CLEAR_CHAT_CONFIRM_BUTTON_SELECTOR, OVERLAY_SELECTOR,
1515
PROMPT_TEXTAREA_SELECTOR, RESPONSE_CONTAINER_SELECTOR, RESPONSE_TEXT_SELECTOR,
1616
EDIT_MESSAGE_BUTTON_SELECTOR,USE_URL_CONTEXT_SELECTOR,UPLOAD_BUTTON_SELECTOR,
17-
SET_THINKING_BUDGET_TOGGLE_SELECTOR, THINKING_BUDGET_INPUT_SELECTOR,
17+
ENABLE_THINKING_MODE_TOGGLE_SELECTOR, SET_THINKING_BUDGET_TOGGLE_SELECTOR, THINKING_BUDGET_INPUT_SELECTOR,
1818
GROUNDING_WITH_GOOGLE_SEARCH_TOGGLE_SELECTOR
1919
)
2020
from config import (
@@ -25,6 +25,7 @@
2525
from models import ClientDisconnectedError
2626
from .operations import save_error_snapshot, _wait_for_response_completion, _get_final_response_content
2727
from .initialization import enable_temporary_chat_mode
28+
from .thinking_normalizer import normalize_reasoning_effort, format_directive_log
2829

2930
class PageController:
3031
"""封装了与AI Studio页面交互的所有操作。"""
@@ -80,68 +81,70 @@ async def adjust_parameters(self, request_params: Dict[str, Any], page_params_ca
8081
await self._adjust_google_search(request_params, check_client_disconnected)
8182

8283
async def _handle_thinking_budget(self, request_params: Dict[str, Any], check_client_disconnected: Callable):
83-
"""处理思考预算的调整逻辑。"""
84-
reasoning_effort = request_params.get('reasoning_effort')
84+
"""处理思考模式和预算的调整逻辑。
8585
86-
# 检查用户是否明确禁用了思考预算
87-
should_disable_budget = isinstance(reasoning_effort, str) and reasoning_effort.lower() == 'none'
88-
89-
if should_disable_budget:
90-
self.logger.info(f"[{self.req_id}] 用户通过 reasoning_effort='none' 明确禁用思考预算。")
91-
await self._control_thinking_budget_toggle(should_be_checked=False, check_client_disconnected=check_client_disconnected)
92-
elif reasoning_effort is not None:
93-
# 用户指定了非 'none' 的值,则开启并设置
94-
self.logger.info(f"[{self.req_id}] 用户指定了 reasoning_effort: {reasoning_effort},将启用并设置思考预算。")
95-
await self._control_thinking_budget_toggle(should_be_checked=True, check_client_disconnected=check_client_disconnected)
96-
await self._adjust_thinking_budget(reasoning_effort, check_client_disconnected)
97-
else:
98-
# 用户未指定,根据默认配置
99-
self.logger.info(f"[{self.req_id}] 用户未指定 reasoning_effort,根据默认配置 ENABLE_THINKING_BUDGET: {ENABLE_THINKING_BUDGET}。")
100-
await self._control_thinking_budget_toggle(should_be_checked=ENABLE_THINKING_BUDGET, check_client_disconnected=check_client_disconnected)
101-
if ENABLE_THINKING_BUDGET:
102-
# 如果默认开启,则使用默认值
103-
await self._adjust_thinking_budget(None, check_client_disconnected)
104-
105-
def _parse_thinking_budget(self, reasoning_effort: Optional[Any]) -> Optional[int]:
106-
"""从 reasoning_effort 解析出 token_budget。"""
107-
token_budget = None
108-
if reasoning_effort is None:
109-
token_budget = DEFAULT_THINKING_BUDGET
110-
self.logger.info(f"[{self.req_id}] 'reasoning_effort' 为空,使用默认思考预算: {token_budget}")
111-
elif isinstance(reasoning_effort, int):
112-
token_budget = reasoning_effort
113-
elif isinstance(reasoning_effort, str):
114-
if reasoning_effort.lower() == 'none':
115-
token_budget = DEFAULT_THINKING_BUDGET
116-
self.logger.info(f"[{self.req_id}] 'reasoning_effort' 为 'none' 字符串,使用默认思考预算: {token_budget}")
117-
else:
118-
effort_map = {
119-
"low": 1000,
120-
"medium": 8000,
121-
"high": 24000
122-
}
123-
token_budget = effort_map.get(reasoning_effort.lower())
124-
if token_budget is None:
125-
try:
126-
token_budget = int(reasoning_effort)
127-
except (ValueError, TypeError):
128-
pass # token_budget remains None
129-
130-
if token_budget is None:
131-
self.logger.warning(f"[{self.req_id}] 无法从 '{reasoning_effort}' (类型: {type(reasoning_effort)}) 解析出有效的 token_budget。")
86+
使用归一化模块将 reasoning_effort 转换为标准指令,然后根据指令控制:
87+
1. 主思考开关(总开关)
88+
2. 手动预算开关
89+
3. 预算值输入框
90+
"""
91+
reasoning_effort = request_params.get('reasoning_effort')
13292

133-
return token_budget
93+
# 使用归一化模块标准化参数
94+
directive = normalize_reasoning_effort(reasoning_effort)
95+
self.logger.info(f"[{self.req_id}] 思考模式指令: {format_directive_log(directive)}")
13496

135-
async def _adjust_thinking_budget(self, reasoning_effort: Optional[Any], check_client_disconnected: Callable):
136-
"""根据 reasoning_effort 调整思考预算。"""
137-
self.logger.info(f"[{self.req_id}] 检查并调整思考预算,输入值: {reasoning_effort}")
138-
139-
token_budget = self._parse_thinking_budget(reasoning_effort)
97+
# 场景1: 关闭思考模式
98+
if not directive.thinking_enabled:
99+
self.logger.info(f"[{self.req_id}] 尝试关闭主思考开关...")
100+
success = await self._control_thinking_mode_toggle(
101+
should_be_enabled=False,
102+
check_client_disconnected=check_client_disconnected
103+
)
140104

141-
if token_budget is None:
142-
self.logger.warning(f"[{self.req_id}] 无效的 reasoning_effort 值: '{reasoning_effort}'。跳过调整。")
105+
if not success:
106+
# 降级方案:主开关不可用,尝试将预算设为 0
107+
self.logger.warning(f"[{self.req_id}] 主思考开关不可用,使用降级方案:设置预算为 0")
108+
await self._control_thinking_budget_toggle(
109+
should_be_checked=True,
110+
check_client_disconnected=check_client_disconnected
111+
)
112+
await self._set_thinking_budget_value(0, check_client_disconnected)
143113
return
144114

115+
# 场景2和3: 开启思考模式
116+
self.logger.info(f"[{self.req_id}] 开启主思考开关...")
117+
await self._control_thinking_mode_toggle(
118+
should_be_enabled=True,
119+
check_client_disconnected=check_client_disconnected
120+
)
121+
122+
# 场景2: 开启思考,不限制预算
123+
if not directive.budget_enabled:
124+
self.logger.info(f"[{self.req_id}] 关闭手动预算限制...")
125+
await self._control_thinking_budget_toggle(
126+
should_be_checked=False,
127+
check_client_disconnected=check_client_disconnected
128+
)
129+
130+
# 场景3: 开启思考,限制预算
131+
else:
132+
self.logger.info(f"[{self.req_id}] 开启手动预算限制并设置预算值: {directive.budget_value} tokens")
133+
await self._control_thinking_budget_toggle(
134+
should_be_checked=True,
135+
check_client_disconnected=check_client_disconnected
136+
)
137+
await self._set_thinking_budget_value(directive.budget_value, check_client_disconnected)
138+
139+
async def _set_thinking_budget_value(self, token_budget: int, check_client_disconnected: Callable):
140+
"""设置思考预算的具体数值。
141+
142+
参数:
143+
token_budget: 预算token数量(由归一化模块计算得出)
144+
check_client_disconnected: 客户端断连检查回调
145+
"""
146+
self.logger.info(f"[{self.req_id}] 设置思考预算值: {token_budget} tokens")
147+
145148
budget_input_locator = self.page.locator(THINKING_BUDGET_INPUT_SELECTOR)
146149

147150
try:
@@ -266,9 +269,71 @@ async def _open_url_content(self,check_client_disconnected: Callable):
266269
if isinstance(e, ClientDisconnectedError):
267270
raise
268271

272+
async def _control_thinking_mode_toggle(self, should_be_enabled: bool, check_client_disconnected: Callable) -> bool:
273+
"""
274+
控制主思考开关(总开关),决定是否启用思考模式。
275+
276+
参数:
277+
should_be_enabled: 期望的开关状态(True=开启, False=关闭)
278+
check_client_disconnected: 客户端断开检测函数
279+
280+
返回:
281+
bool: 是否成功设置到期望状态(如果开关不存在或被禁用,返回False)
282+
"""
283+
toggle_selector = ENABLE_THINKING_MODE_TOGGLE_SELECTOR
284+
self.logger.info(f"[{self.req_id}] 控制主思考开关,期望状态: {'开启' if should_be_enabled else '关闭'}...")
285+
286+
try:
287+
toggle_locator = self.page.locator(toggle_selector)
288+
289+
# 等待元素可见(5秒超时)
290+
await expect_async(toggle_locator).to_be_visible(timeout=5000)
291+
await self._check_disconnect(check_client_disconnected, "主思考开关 - 元素可见后")
292+
293+
# 检查当前状态
294+
is_checked_str = await toggle_locator.get_attribute("aria-checked")
295+
current_state_is_enabled = is_checked_str == "true"
296+
self.logger.info(f"[{self.req_id}] 主思考开关当前状态: {is_checked_str} (是否开启: {current_state_is_enabled})")
297+
298+
# 如果当前状态与期望状态不同,点击切换
299+
if current_state_is_enabled != should_be_enabled:
300+
action = "开启" if should_be_enabled else "关闭"
301+
self.logger.info(f"[{self.req_id}] 主思考开关需要切换,正在点击以{action}思考模式...")
302+
303+
await toggle_locator.click(timeout=CLICK_TIMEOUT_MS)
304+
await self._check_disconnect(check_client_disconnected, f"主思考开关 - 点击{action}后")
305+
306+
# 等待状态更新
307+
await asyncio.sleep(0.5)
308+
309+
# 验证新状态
310+
new_state_str = await toggle_locator.get_attribute("aria-checked")
311+
new_state_is_enabled = new_state_str == "true"
312+
313+
if new_state_is_enabled == should_be_enabled:
314+
self.logger.info(f"[{self.req_id}] ✅ 主思考开关已成功{action}。新状态: {new_state_str}")
315+
return True
316+
else:
317+
self.logger.warning(f"[{self.req_id}] ⚠️ 主思考开关{action}后验证失败。期望: {should_be_enabled}, 实际: {new_state_str}")
318+
return False
319+
else:
320+
self.logger.info(f"[{self.req_id}] 主思考开关已处于期望状态,无需操作。")
321+
return True
322+
323+
except TimeoutError:
324+
self.logger.warning(f"[{self.req_id}] ⚠️ 主思考开关元素未找到或不可见(当前模型可能不支持思考模式)")
325+
return False
326+
except Exception as e:
327+
self.logger.error(f"[{self.req_id}] ❌ 操作主思考开关时发生错误: {e}")
328+
await save_error_snapshot(f"thinking_mode_toggle_error_{self.req_id}")
329+
if isinstance(e, ClientDisconnectedError):
330+
raise
331+
return False
332+
269333
async def _control_thinking_budget_toggle(self, should_be_checked: bool, check_client_disconnected: Callable):
270334
"""
271335
根据 should_be_checked 的值,控制 "Thinking Budget" 滑块开关的状态。
336+
(手动预算开关,控制是否限制思考预算)
272337
"""
273338
toggle_selector = SET_THINKING_BUDGET_TOGGLE_SELECTOR
274339
self.logger.info(f"[{self.req_id}] 控制 'Thinking Budget' 开关,期望状态: {'选中' if should_be_checked else '未选中'}...")
@@ -901,27 +966,28 @@ async def submit_prompt(self, prompt: str,image_list: List, check_client_disconn
901966
await self._check_disconnect(check_client_disconnected, "After Submit Button Enabled")
902967
await asyncio.sleep(0.3)
903968

904-
# 优先回车提交,其次按钮提交,最后组合键提交
905-
submitted_successfully = await self._try_enter_submit(prompt_textarea_locator, check_client_disconnected)
906-
if not submitted_successfully:
907-
self.logger.info(f"[{self.req_id}] 回车提交失败,尝试点击提交按钮...")
908-
button_clicked = False
909-
try:
910-
# 提交前再处理一次潜在对话框,避免按钮点击被拦截
911-
await self._handle_post_upload_dialog()
912-
await submit_button_locator.click(timeout=5000)
913-
self.logger.info(f"[{self.req_id}] ✅ 提交按钮点击完成。")
914-
button_clicked = True
915-
except Exception as click_err:
916-
self.logger.error(f"[{self.req_id}] ❌ 提交按钮点击失败: {click_err}")
917-
await save_error_snapshot(f"submit_button_click_fail_{self.req_id}")
918-
919-
if not button_clicked:
920-
self.logger.info(f"[{self.req_id}] 按钮提交失败,尝试组合键提交...")
969+
# 优先点击按钮提交,其次回车提交,最后组合键提交
970+
button_clicked = False
971+
try:
972+
self.logger.info(f"[{self.req_id}] 尝试点击提交按钮...")
973+
# 提交前再处理一次潜在对话框,避免按钮点击被拦截
974+
await self._handle_post_upload_dialog()
975+
await submit_button_locator.click(timeout=5000)
976+
self.logger.info(f"[{self.req_id}] ✅ 提交按钮点击完成。")
977+
button_clicked = True
978+
except Exception as click_err:
979+
self.logger.error(f"[{self.req_id}] ❌ 提交按钮点击失败: {click_err}")
980+
await save_error_snapshot(f"submit_button_click_fail_{self.req_id}")
981+
982+
if not button_clicked:
983+
self.logger.info(f"[{self.req_id}] 按钮提交失败,尝试回车键提交...")
984+
submitted_successfully = await self._try_enter_submit(prompt_textarea_locator, check_client_disconnected)
985+
if not submitted_successfully:
986+
self.logger.info(f"[{self.req_id}] 回车提交失败,尝试组合键提交...")
921987
combo_ok = await self._try_combo_submit(prompt_textarea_locator, check_client_disconnected)
922988
if not combo_ok:
923989
self.logger.error(f"[{self.req_id}] ❌ 组合键提交也失败。")
924-
raise Exception("Submit failed: Enter, Button, and Combo key all failed")
990+
raise Exception("Submit failed: Button, Enter, and Combo key all failed")
925991

926992
await self._check_disconnect(check_client_disconnected, "After Submit")
927993

0 commit comments

Comments
 (0)