Skip to content

Commit 727e462

Browse files
committed
feat(config): 修复思考预算功能并增强启动逻辑
本次提交包含多项修复和改进,主要解决了“Set thinking budget”功能在特定配置下被意外触发以及相关UI交互失败的问题。 主要修改如下: - **修复启动加载顺序**: 调整了 `launch_camoufox.py` 的代码,确保 `.env` 环境配置文件在导入任何应用模块之前被加载,解决了因加载顺序不当导致配置失效的问题。 - **增强服务独立性**: 为 `server.py` 添加了 `load_dotenv()` 调用,使其在独立运行时也能正确加载环境配置,提高了程序的健壮性。 - **修正思考预算逻辑**: 修改了 `browser_utils/page_controller.py` 中的 `_handle_thinking_budget` 函数,正确处理 `reasoning_effort: 'none'` 的情况,将其视为禁用“Set thinking budget”功能。 - **改进工具面板交互**: 将展开高级设置工具面板的逻辑从特定功能的判断中独立出来,确保在需要调整任何高级设置(如思考预算、URL上下文等)前,面板都处于展开状态,避免了因面板折叠导致的元素定位失败。 - **更新选择器**: 修正了 `config/selectors.py` 中 `THINKING_BUDGET_INPUT_SELECTOR` 的 XPath,使其能够正确定位到嵌套在额外 `div` 中的思考预算输入框。
1 parent 07627d4 commit 727e462

4 files changed

Lines changed: 235 additions & 195 deletions

File tree

browser_utils/page_controller.py

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ async def adjust_parameters(self, request_params: Dict[str, Any], page_params_ca
6262
await self._adjust_top_p(top_p_to_set, check_client_disconnected)
6363
await self._check_disconnect(check_client_disconnected, "End Parameter Adjustment")
6464

65+
# 确保工具面板已展开,以便调整高级设置
66+
await self._ensure_tools_panel_expanded(check_client_disconnected)
67+
6568
# 调整URL CONTEXT
6669
if ENABLE_URL_CONTEXT:
6770
await self._open_url_content(check_client_disconnected)
@@ -77,9 +80,16 @@ async def adjust_parameters(self, request_params: Dict[str, Any], page_params_ca
7780
async def _handle_thinking_budget(self, request_params: Dict[str, Any], check_client_disconnected: Callable):
7881
"""处理思考预算的调整逻辑。"""
7982
reasoning_effort = request_params.get('reasoning_effort')
80-
if reasoning_effort is not None:
81-
# 用户指定了,则开启并设置
82-
self.logger.info(f"[{self.req_id}] 用户指定了 reasoning_effort: {reasoning_effort}。")
83+
84+
# 检查用户是否明确禁用了思考预算
85+
should_disable_budget = isinstance(reasoning_effort, str) and reasoning_effort.lower() == 'none'
86+
87+
if should_disable_budget:
88+
self.logger.info(f"[{self.req_id}] 用户通过 reasoning_effort='none' 明确禁用思考预算。")
89+
await self._control_thinking_budget_toggle(should_be_checked=False, check_client_disconnected=check_client_disconnected)
90+
elif reasoning_effort is not None:
91+
# 用户指定了非 'none' 的值,则开启并设置
92+
self.logger.info(f"[{self.req_id}] 用户指定了 reasoning_effort: {reasoning_effort},将启用并设置思考预算。")
8393
await self._control_thinking_budget_toggle(should_be_checked=True, check_client_disconnected=check_client_disconnected)
8494
await self._adjust_thinking_budget(reasoning_effort, check_client_disconnected)
8595
else:
@@ -209,27 +219,50 @@ async def _adjust_google_search(self, request_params: Dict[str, Any], check_clie
209219
if isinstance(e, ClientDisconnectedError):
210220
raise
211221

212-
async def _open_url_content(self,check_client_disconnected: Callable):
222+
async def _ensure_tools_panel_expanded(self, check_client_disconnected: Callable):
223+
"""确保包含高级工具(URL上下文、思考预算等)的面板是展开的。"""
224+
self.logger.info(f"[{self.req_id}] 检查并确保工具面板已展开...")
213225
try:
214226
collapse_tools_locator = self.page.locator('button[aria-label="Expand or collapse tools"]')
227+
await expect_async(collapse_tools_locator).to_be_visible(timeout=5000)
228+
215229
grandparent_locator = collapse_tools_locator.locator("xpath=../..")
230+
class_string = await grandparent_locator.get_attribute("class", timeout=3000)
216231

217-
# 3. 获取祖父级元素的 class 属性值
218-
# get_attribute 返回一个包含所有 class 的字符串,例如 "menu dropdown active"
219-
class_string = await grandparent_locator.get_attribute("class")
220-
221-
# 4. 在 Python 中进行判断
222-
# 确保 class_string 不是 None,并且 'expanded' 是一个独立的 class
223232
if class_string and "expanded" not in class_string.split():
233+
self.logger.info(f"[{self.req_id}] 工具面板未展开,正在点击以展开...")
224234
await collapse_tools_locator.click(timeout=CLICK_TIMEOUT_MS)
225-
await asyncio.sleep(0.5)
235+
await self._check_disconnect(check_client_disconnected, "展开工具面板后")
236+
# 等待展开动画完成
237+
await expect_async(grandparent_locator).to_have_class(re.compile(r'.*expanded.*'), timeout=5000)
238+
self.logger.info(f"[{self.req_id}] ✅ 工具面板已成功展开。")
239+
else:
240+
self.logger.info(f"[{self.req_id}] 工具面板已处于展开状态。")
241+
except Exception as e:
242+
self.logger.error(f"[{self.req_id}] ❌ 展开工具面板时发生错误: {e}")
243+
# 即使出错,也继续尝试执行后续操作,但记录错误
244+
if isinstance(e, ClientDisconnectedError):
245+
raise
246+
247+
async def _open_url_content(self,check_client_disconnected: Callable):
248+
"""仅负责打开 URL Context 开关,前提是面板已展开。"""
249+
try:
250+
self.logger.info(f"[{self.req_id}] 检查并启用 URL Context 开关...")
226251
use_url_content_selector = self.page.locator(USE_URL_CONTEXT_SELECTOR)
252+
await expect_async(use_url_content_selector).to_be_visible(timeout=5000)
253+
227254
is_checked = await use_url_content_selector.get_attribute("aria-checked")
228255
if "false" == is_checked:
256+
self.logger.info(f"[{self.req_id}] URL Context 开关未开启,正在点击以开启...")
229257
await use_url_content_selector.click(timeout=CLICK_TIMEOUT_MS)
230-
await self._check_disconnect(check_client_disconnected, "点击URLCONTEXT")
258+
await self._check_disconnect(check_client_disconnected, "点击URLCONTEXT后")
259+
self.logger.info(f"[{self.req_id}] ✅ URL Context 开关已点击。")
260+
else:
261+
self.logger.info(f"[{self.req_id}] URL Context 开关已处于开启状态。")
231262
except Exception as e:
232-
self.logger.error(f"[{self.req_id}] ❌ 操作USE_URL_CONTEXT_SELECTOR时发生错误:{e}。")
263+
self.logger.error(f"[{self.req_id}] ❌ 操作 USE_URL_CONTEXT_SELECTOR 时发生错误:{e}。")
264+
if isinstance(e, ClientDisconnectedError):
265+
raise
233266

234267
async def _control_thinking_budget_toggle(self, should_be_checked: bool, check_client_disconnected: Callable):
235268
"""

config/selectors.py

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,49 @@
1-
"""
2-
CSS选择器配置模块
3-
包含所有用于页面元素定位的CSS选择器
4-
"""
5-
6-
# --- 输入相关选择器 ---
7-
PROMPT_TEXTAREA_SELECTOR = 'ms-prompt-input-wrapper ms-autosize-textarea textarea'
8-
INPUT_SELECTOR = PROMPT_TEXTAREA_SELECTOR
9-
INPUT_SELECTOR2 = PROMPT_TEXTAREA_SELECTOR
10-
11-
# --- 按钮选择器 ---
12-
SUBMIT_BUTTON_SELECTOR = 'button[aria-label="Run"].run-button'
13-
CLEAR_CHAT_BUTTON_SELECTOR = 'button[data-test-clear="outside"][aria-label="Clear chat"]'
14-
CLEAR_CHAT_CONFIRM_BUTTON_SELECTOR = 'button.mdc-button:has-text("Continue")'
15-
UPLOAD_BUTTON_SELECTOR = 'button[aria-label="Upload File"]'
16-
17-
# --- 响应相关选择器 ---
18-
RESPONSE_CONTAINER_SELECTOR = 'ms-chat-turn .chat-turn-container.model'
19-
RESPONSE_TEXT_SELECTOR = 'ms-cmark-node.cmark-node'
20-
21-
# --- 加载和状态选择器 ---
22-
LOADING_SPINNER_SELECTOR = 'button[aria-label="Run"].run-button svg .stoppable-spinner'
23-
OVERLAY_SELECTOR = 'div.cdk-overlay-backdrop'
24-
25-
# --- 错误提示选择器 ---
26-
ERROR_TOAST_SELECTOR = 'div.toast.warning, div.toast.error'
27-
28-
# --- 编辑相关选择器 ---
29-
EDIT_MESSAGE_BUTTON_SELECTOR = 'ms-chat-turn:last-child .actions-container button.toggle-edit-button'
30-
MESSAGE_TEXTAREA_SELECTOR = 'ms-chat-turn:last-child ms-text-chunk ms-autosize-textarea'
31-
FINISH_EDIT_BUTTON_SELECTOR = 'ms-chat-turn:last-child .actions-container button.toggle-edit-button[aria-label="Stop editing"]'
32-
33-
# --- 菜单和复制相关选择器 ---
34-
MORE_OPTIONS_BUTTON_SELECTOR = 'div.actions-container div ms-chat-turn-options div > button'
35-
COPY_MARKDOWN_BUTTON_SELECTOR = 'button.mat-mdc-menu-item:nth-child(4)'
36-
COPY_MARKDOWN_BUTTON_SELECTOR_ALT = 'div[role="menu"] button:has-text("Copy Markdown")'
37-
38-
# --- 设置相关选择器 ---
39-
MAX_OUTPUT_TOKENS_SELECTOR = 'input[aria-label="Maximum output tokens"]'
40-
STOP_SEQUENCE_INPUT_SELECTOR = 'input[aria-label="Add stop token"]'
41-
MAT_CHIP_REMOVE_BUTTON_SELECTOR = 'mat-chip-set mat-chip-row button[aria-label*="Remove"]'
42-
TOP_P_INPUT_SELECTOR = 'div.settings-item-column:has(h3:text-is("Top P")) input[type="number"].slider-input'
43-
TEMPERATURE_INPUT_SELECTOR = 'div[data-test-id="temperatureSliderContainer"] input[type="number"].slider-input'
44-
USE_URL_CONTEXT_SELECTOR = 'button[aria-label="Browse the url context"]'
45-
SET_THINKING_BUDGET_TOGGLE_SELECTOR = 'button[aria-label="Toggle thinking budget between auto and manual"]'
46-
# Thinking budget slider input
47-
THINKING_BUDGET_INPUT_SELECTOR = 'xpath=//div[contains(@class, "settings-item") and .//p[normalize-space()="Set thinking budget"]]/following-sibling::div[contains(@class, "item-input")]//input[@type="number"]'
48-
# --- Google Search Grounding ---
1+
"""
2+
CSS选择器配置模块
3+
包含所有用于页面元素定位的CSS选择器
4+
"""
5+
6+
# --- 输入相关选择器 ---
7+
PROMPT_TEXTAREA_SELECTOR = 'ms-prompt-input-wrapper ms-autosize-textarea textarea'
8+
INPUT_SELECTOR = PROMPT_TEXTAREA_SELECTOR
9+
INPUT_SELECTOR2 = PROMPT_TEXTAREA_SELECTOR
10+
11+
# --- 按钮选择器 ---
12+
SUBMIT_BUTTON_SELECTOR = 'button[aria-label="Run"].run-button'
13+
CLEAR_CHAT_BUTTON_SELECTOR = 'button[data-test-clear="outside"][aria-label="Clear chat"]'
14+
CLEAR_CHAT_CONFIRM_BUTTON_SELECTOR = 'button.mdc-button:has-text("Continue")'
15+
UPLOAD_BUTTON_SELECTOR = 'button[aria-label="Upload File"]'
16+
17+
# --- 响应相关选择器 ---
18+
RESPONSE_CONTAINER_SELECTOR = 'ms-chat-turn .chat-turn-container.model'
19+
RESPONSE_TEXT_SELECTOR = 'ms-cmark-node.cmark-node'
20+
21+
# --- 加载和状态选择器 ---
22+
LOADING_SPINNER_SELECTOR = 'button[aria-label="Run"].run-button svg .stoppable-spinner'
23+
OVERLAY_SELECTOR = 'div.cdk-overlay-backdrop'
24+
25+
# --- 错误提示选择器 ---
26+
ERROR_TOAST_SELECTOR = 'div.toast.warning, div.toast.error'
27+
28+
# --- 编辑相关选择器 ---
29+
EDIT_MESSAGE_BUTTON_SELECTOR = 'ms-chat-turn:last-child .actions-container button.toggle-edit-button'
30+
MESSAGE_TEXTAREA_SELECTOR = 'ms-chat-turn:last-child ms-text-chunk ms-autosize-textarea'
31+
FINISH_EDIT_BUTTON_SELECTOR = 'ms-chat-turn:last-child .actions-container button.toggle-edit-button[aria-label="Stop editing"]'
32+
33+
# --- 菜单和复制相关选择器 ---
34+
MORE_OPTIONS_BUTTON_SELECTOR = 'div.actions-container div ms-chat-turn-options div > button'
35+
COPY_MARKDOWN_BUTTON_SELECTOR = 'button.mat-mdc-menu-item:nth-child(4)'
36+
COPY_MARKDOWN_BUTTON_SELECTOR_ALT = 'div[role="menu"] button:has-text("Copy Markdown")'
37+
38+
# --- 设置相关选择器 ---
39+
MAX_OUTPUT_TOKENS_SELECTOR = 'input[aria-label="Maximum output tokens"]'
40+
STOP_SEQUENCE_INPUT_SELECTOR = 'input[aria-label="Add stop token"]'
41+
MAT_CHIP_REMOVE_BUTTON_SELECTOR = 'mat-chip-set mat-chip-row button[aria-label*="Remove"]'
42+
TOP_P_INPUT_SELECTOR = 'div.settings-item-column:has(h3:text-is("Top P")) input[type="number"].slider-input'
43+
TEMPERATURE_INPUT_SELECTOR = 'div[data-test-id="temperatureSliderContainer"] input[type="number"].slider-input'
44+
USE_URL_CONTEXT_SELECTOR = 'button[aria-label="Browse the url context"]'
45+
SET_THINKING_BUDGET_TOGGLE_SELECTOR = 'button[aria-label="Toggle thinking budget between auto and manual"]'
46+
# Thinking budget slider input
47+
THINKING_BUDGET_INPUT_SELECTOR = '//div[contains(@class, "settings-item") and .//p[normalize-space()="Set thinking budget"]]/following-sibling::div//input[@type="number"]'
48+
# --- Google Search Grounding ---
4949
GROUNDING_WITH_GOOGLE_SEARCH_TOGGLE_SELECTOR = 'div[data-test-id="searchAsAToolTooltip"] mat-slide-toggle button'

launch_camoufox.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020
import shutil
2121

2222
# --- 新的导入 ---
23-
import uvicorn
24-
from server import app # 从 server.py 导入 FastAPI app 对象
2523
from dotenv import load_dotenv
2624

27-
# 加载 .env 文件
25+
# 提前加载 .env 文件,以确保后续导入的模块能获取到正确的环境变量
2826
load_dotenv()
27+
28+
import uvicorn
29+
from server import app # 从 server.py 导入 FastAPI app 对象
2930
# -----------------
3031

3132
# 尝试导入 launch_server (用于内部启动模式,模拟 Camoufox 行为)

0 commit comments

Comments
 (0)