Skip to content

Commit 08ad2a2

Browse files
committed
feat: Gemini 3 Pro thinking levels; stabilize budgets; selector/chat updates
1 parent b3a537b commit 08ad2a2

6 files changed

Lines changed: 1090 additions & 287 deletions

File tree

browser_utils/page_controller.py

Lines changed: 936 additions & 265 deletions
Large diffs are not rendered by default.

browser_utils/thinking_normalizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def _parse_budget_value(reasoning_effort: Any) -> Optional[int]:
131131
effort_map = {
132132
"low": 1000,
133133
"medium": 8000,
134-
"high": 24000,
134+
"high": 32768,
135135
}
136136

137137
# 先尝试预设值

config/selectors.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,50 @@
44
"""
55

66
# --- 输入相关选择器 ---
7-
PROMPT_TEXTAREA_SELECTOR = 'ms-prompt-input-wrapper ms-autosize-textarea textarea'
7+
PROMPT_TEXTAREA_SELECTOR = "ms-prompt-input-wrapper ms-autosize-textarea textarea"
88
INPUT_SELECTOR = PROMPT_TEXTAREA_SELECTOR
99
INPUT_SELECTOR2 = PROMPT_TEXTAREA_SELECTOR
1010

1111
# --- 按钮选择器 ---
1212
# 发送按钮:优先匹配 aria-label="Run" 的按钮;如页面结构变更,可退化到容器内的提交按钮。
1313
SUBMIT_BUTTON_SELECTOR = 'button[aria-label="Run"].run-button, ms-run-button button[type="submit"].run-button'
14-
CLEAR_CHAT_BUTTON_SELECTOR = 'button[data-test-clear="outside"][aria-label="New chat"]'
15-
CLEAR_CHAT_CONFIRM_BUTTON_SELECTOR = 'button.ms-button-primary:has-text("Discard and continue")'
14+
CLEAR_CHAT_BUTTON_SELECTOR = 'button[data-test-clear="outside"][aria-label="New chat"], button[aria-label="New chat"]'
15+
CLEAR_CHAT_CONFIRM_BUTTON_SELECTOR = (
16+
'button.ms-button-primary:has-text("Discard and continue")'
17+
)
1618
UPLOAD_BUTTON_SELECTOR = 'button[aria-label^="Insert assets"]'
1719

1820
# --- 响应相关选择器 ---
19-
RESPONSE_CONTAINER_SELECTOR = 'ms-chat-turn .chat-turn-container.model'
20-
RESPONSE_TEXT_SELECTOR = 'ms-cmark-node.cmark-node'
21+
RESPONSE_CONTAINER_SELECTOR = "ms-chat-turn .chat-turn-container.model"
22+
RESPONSE_TEXT_SELECTOR = "ms-cmark-node.cmark-node"
2123

2224
# --- 加载和状态选择器 ---
2325
LOADING_SPINNER_SELECTOR = 'button[aria-label="Run"].run-button svg .stoppable-spinner'
24-
OVERLAY_SELECTOR = '.mat-mdc-dialog-inner-container'
26+
OVERLAY_SELECTOR = ".mat-mdc-dialog-inner-container"
2527

2628
# --- 错误提示选择器 ---
27-
ERROR_TOAST_SELECTOR = 'div.toast.warning, div.toast.error'
29+
ERROR_TOAST_SELECTOR = "div.toast.warning, div.toast.error"
2830

2931
# --- 编辑相关选择器 ---
30-
EDIT_MESSAGE_BUTTON_SELECTOR = 'ms-chat-turn:last-child .actions-container button.toggle-edit-button'
31-
MESSAGE_TEXTAREA_SELECTOR = 'ms-chat-turn:last-child ms-text-chunk ms-autosize-textarea'
32+
EDIT_MESSAGE_BUTTON_SELECTOR = (
33+
"ms-chat-turn:last-child .actions-container button.toggle-edit-button"
34+
)
35+
MESSAGE_TEXTAREA_SELECTOR = "ms-chat-turn:last-child ms-text-chunk ms-autosize-textarea"
3236
FINISH_EDIT_BUTTON_SELECTOR = 'ms-chat-turn:last-child .actions-container button.toggle-edit-button[aria-label="Stop editing"]'
3337

3438
# --- 菜单和复制相关选择器 ---
35-
MORE_OPTIONS_BUTTON_SELECTOR = 'div.actions-container div ms-chat-turn-options div > button'
36-
COPY_MARKDOWN_BUTTON_SELECTOR = 'button.mat-mdc-menu-item:nth-child(4)'
39+
MORE_OPTIONS_BUTTON_SELECTOR = (
40+
"div.actions-container div ms-chat-turn-options div > button"
41+
)
42+
COPY_MARKDOWN_BUTTON_SELECTOR = "button.mat-mdc-menu-item:nth-child(4)"
3743
COPY_MARKDOWN_BUTTON_SELECTOR_ALT = 'div[role="menu"] button:has-text("Copy Markdown")'
3844

3945
# --- 设置相关选择器 ---
4046
MAX_OUTPUT_TOKENS_SELECTOR = 'input[aria-label="Maximum output tokens"]'
4147
STOP_SEQUENCE_INPUT_SELECTOR = 'input[aria-label="Add stop token"]'
42-
MAT_CHIP_REMOVE_BUTTON_SELECTOR = 'mat-chip-set mat-chip-row button[aria-label*="Remove"]'
48+
MAT_CHIP_REMOVE_BUTTON_SELECTOR = (
49+
'mat-chip-set mat-chip-row button[aria-label*="Remove"]'
50+
)
4351
TOP_P_INPUT_SELECTOR = 'ms-slider input[type="number"][max="1"]'
4452
TEMPERATURE_INPUT_SELECTOR = 'ms-slider input[type="number"][max="2"]'
4553
USE_URL_CONTEXT_SELECTOR = 'button[aria-label="Browse the url context"]'
@@ -48,9 +56,19 @@
4856
# 主思考开关:控制是否启用思考模式(总开关)
4957
ENABLE_THINKING_MODE_TOGGLE_SELECTOR = '[data-test-toggle="enable-thinking"] button'
5058
# 手动预算开关:控制是否手动限制思考预算
51-
SET_THINKING_BUDGET_TOGGLE_SELECTOR = '[data-test-toggle="manual-budget"] button'
59+
SET_THINKING_BUDGET_TOGGLE_SELECTOR = (
60+
'mat-slide-toggle[data-test-toggle="manual-budget"] button[role="switch"].mdc-switch, '
61+
'[data-test-toggle="manual-budget"] button[role="switch"].mdc-switch'
62+
)
5263
# 思考预算输入框
5364
THINKING_BUDGET_INPUT_SELECTOR = '[data-test-slider] input[type="number"]'
5465

66+
# 思考等级下拉
67+
THINKING_LEVEL_SELECT_SELECTOR = '[role="combobox"][aria-label="Thinking Level"], mat-select[aria-label="Thinking Level"], [role="combobox"][aria-label="Thinking level"], mat-select[aria-label="Thinking level"]'
68+
THINKING_LEVEL_OPTION_LOW_SELECTOR = '[role="listbox"][aria-label="Thinking Level"] [role="option"]:has-text("Low"), [role="listbox"][aria-label="Thinking level"] [role="option"]:has-text("Low")'
69+
THINKING_LEVEL_OPTION_HIGH_SELECTOR = '[role="listbox"][aria-label="Thinking Level"] [role="option"]:has-text("High"), [role="listbox"][aria-label="Thinking level"] [role="option"]:has-text("High")'
70+
5571
# --- Google Search Grounding ---
56-
GROUNDING_WITH_GOOGLE_SEARCH_TOGGLE_SELECTOR = 'div[data-test-id="searchAsAToolTooltip"] mat-slide-toggle button'
72+
GROUNDING_WITH_GOOGLE_SEARCH_TOGGLE_SELECTOR = (
73+
'div[data-test-id="searchAsAToolTooltip"] mat-slide-toggle button'
74+
)

docs/api-usage.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ response = requests.post(
179179
{
180180
"reasoning_effort": "low" // 1000 tokens
181181
// 或 "medium" (8000 tokens)
182-
// 或 "high" (24000 tokens)
182+
// 或 "high" (32768 tokens)
183183
}
184184
```
185185

index.html

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,37 @@ <h3>生成参数</h3>
240240
</div>
241241
</div>
242242

243+
<div class="settings-group">
244+
<label for="enableThinkingToggle">思考模式 (Thinking Mode):</label>
245+
<input type="checkbox" id="enableThinkingToggle">
246+
<div class="settings-description">
247+
关闭则不使用思考模式;开启时可选择“思考等级”或设置“思考预算”。
248+
</div>
249+
</div>
250+
251+
<div class="settings-group">
252+
<label for="thinkingLevelSelector">思考等级 (Thinking Level):</label>
253+
<select id="thinkingLevelSelector" class="settings-input">
254+
<option value="">未指定</option>
255+
<option value="low">Low</option>
256+
<option value="high">High</option>
257+
</select>
258+
<div class="settings-description">
259+
对于使用“思考等级”的模型(如 Gemini 3 Pro Preview),这里的选择将被优先使用。
260+
</div>
261+
</div>
262+
263+
<div class="settings-group">
264+
<label for="thinkingBudgetValue">思考预算 (Thinking Budget):</label>
265+
<div class="settings-slider-container">
266+
<input type="range" id="thinkingBudgetSlider" class="settings-slider" min="128" max="32768" step="1" value="8192">
267+
<input type="number" id="thinkingBudgetValue" class="settings-number" min="128" max="32768" step="1" value="8192">
268+
</div>
269+
<div class="settings-description">
270+
对于使用预算的模型,设定具体的思考 Token 数。若同时指定“思考等级”,将优先使用等级。
271+
</div>
272+
</div>
273+
243274
<div class="settings-group">
244275
<label for="stopSequences">停止序列 (Stop Sequences):</label>
245276
<input type="text" id="stopSequences" class="settings-input" placeholder="用逗号分隔多个停止序列">
@@ -250,6 +281,15 @@ <h3>生成参数</h3>
250281
</div>
251282
</div>
252283

284+
<div class="info-card">
285+
<h3>工具</h3>
286+
<div class="settings-group">
287+
<label for="enableGoogleSearchToggle">Grounding with Google Search</label>
288+
<input type="checkbox" id="enableGoogleSearchToggle">
289+
<div class="settings-description">启用搜索作为工具以改进事实性。</div>
290+
</div>
291+
</div>
292+
253293
<div class="info-card">
254294
<h3>设置保存状态</h3>
255295
<div id="settings-status" class="settings-status">
@@ -285,4 +325,4 @@ <h3>设置保存状态</h3>
285325
<script src="webui.js" defer></script>
286326
</body>
287327

288-
</html>
328+
</html>

webui.js

Lines changed: 79 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ let chatbox, userInput, sendButton, clearButton, sidebarPanel, toggleSidebarButt
77
temperatureValue, maxOutputTokensSlider, maxOutputTokensValue, topPSlider,
88
topPValue, stopSequencesInput, saveModelSettingsButton, resetModelSettingsButton,
99
settingsStatusElement, apiKeyStatus, newApiKeyInput, toggleApiKeyVisibilityButton,
10-
testApiKeyButton, apiKeyList;
10+
testApiKeyButton, apiKeyList,
11+
enableThinkingToggle, thinkingLevelSelector, thinkingBudgetSlider, thinkingBudgetValue,
12+
enableGoogleSearchToggle;
1113

1214
function initializeDOMReferences() {
1315
chatbox = document.getElementById('chatbox');
@@ -44,6 +46,14 @@ function initializeDOMReferences() {
4446
resetModelSettingsButton = document.getElementById('resetModelSettingsButton');
4547
settingsStatusElement = document.getElementById('settings-status');
4648

49+
enableThinkingToggle = document.getElementById('enableThinkingToggle');
50+
thinkingLevelSelector = document.getElementById('thinkingLevelSelector');
51+
thinkingBudgetSlider = document.getElementById('thinkingBudgetSlider');
52+
thinkingBudgetValue = document.getElementById('thinkingBudgetValue');
53+
54+
enableGoogleSearchToggle = document.getElementById('enableGoogleSearchToggle');
55+
56+
4757
// API密钥管理元素
4858
apiKeyStatus = document.getElementById('apiKeyStatus');
4959
newApiKeyInput = document.getElementById('newApiKey');
@@ -52,6 +62,29 @@ function initializeDOMReferences() {
5262
apiKeyList = document.getElementById('apiKeyList');
5363
}
5464

65+
function modelUsesThinkingLevel(modelId) {
66+
try {
67+
const id = String(modelId || '').toLowerCase();
68+
return id.includes('gemini-3') && id.includes('pro');
69+
} catch (e) {
70+
return false;
71+
}
72+
}
73+
74+
function computeReasoningEffort(settings) {
75+
try {
76+
if (!settings.enableThinking) return 0;
77+
const lvl = (settings.thinkingLevel || '').toLowerCase();
78+
const budget = parseInt(settings.thinkingBudget);
79+
const useLevels = modelUsesThinkingLevel(SELECTED_MODEL);
80+
if (useLevels && (lvl === 'low' || lvl === 'high')) return lvl;
81+
if (!isNaN(budget) && budget > 0) return budget;
82+
return 'none';
83+
} catch (e) {
84+
return 'none';
85+
}
86+
}
87+
5588

5689
// --- Constants & Global Variables ---
5790
const API_URL = '/v1/chat/completions';
@@ -76,7 +109,11 @@ let modelSettings = {
76109
temperature: -1,
77110
maxOutputTokens: -1,
78111
topP: -1,
79-
stopSequences: ""
112+
stopSequences: "",
113+
enableThinking: false,
114+
thinkingBudget: 8192,
115+
thinkingLevel: "",
116+
enableGoogleSearch: false
80117
};
81118

82119
// --- Helper Functions ---
@@ -478,11 +515,18 @@ async function sendMessage() {
478515
max_output_tokens: modelSettings.maxOutputTokens,
479516
top_p: modelSettings.topP,
480517
};
518+
requestBody.reasoning_effort = computeReasoningEffort(modelSettings);
519+
const tools = [];
520+
if (enableGoogleSearchToggle && enableGoogleSearchToggle.checked) tools.push({ google_search_retrieval: {} });
521+
if (tools.length > 0) {
522+
requestBody.tools = tools;
523+
requestBody.tool_choice = 'auto';
524+
}
481525
if (modelSettings.stopSequences) {
482526
const stopArray = modelSettings.stopSequences.split(',').map(seq => seq.trim()).filter(seq => seq.length > 0);
483527
if (stopArray.length > 0) requestBody.stop = stopArray;
484528
}
485-
addLogEntry(`[信息] 发送请求,模型: ${SELECTED_MODEL}, 温度: ${requestBody.temperature ?? '默认'}, 最大Token: ${requestBody.max_output_tokens ?? '默认'}, Top P: ${requestBody.top_p ?? '默认'}`);
529+
addLogEntry(`[信息] 发送请求,模型: ${SELECTED_MODEL}, 温度: ${requestBody.temperature ?? '默认'}, 最大Token: ${requestBody.max_output_tokens ?? '默认'}, Top P: ${requestBody.top_p ?? '默认'}, 思考参数: ${String(requestBody.reasoning_effort)}, 工具: ${JSON.stringify(requestBody.tools || [])}`);
486530

487531
// 获取API密钥进行认证
488532
const apiKey = await getValidApiKey();
@@ -888,6 +932,12 @@ function updateModelSettingsUI() {
888932
maxOutputTokensSlider.value = maxOutputTokensValue.value = modelSettings.maxOutputTokens;
889933
topPSlider.value = topPValue.value = modelSettings.topP;
890934
stopSequencesInput.value = modelSettings.stopSequences;
935+
if (enableThinkingToggle) enableThinkingToggle.checked = !!modelSettings.enableThinking;
936+
if (thinkingLevelSelector) thinkingLevelSelector.value = modelSettings.thinkingLevel || "";
937+
if (thinkingBudgetSlider) thinkingBudgetSlider.value = modelSettings.thinkingBudget;
938+
if (thinkingBudgetValue) thinkingBudgetValue.value = modelSettings.thinkingBudget;
939+
if (enableGoogleSearchToggle) enableGoogleSearchToggle.checked = !!modelSettings.enableGoogleSearch;
940+
891941
}
892942

893943
function saveModelSettings() {
@@ -896,6 +946,14 @@ function saveModelSettings() {
896946
modelSettings.maxOutputTokens = parseInt(maxOutputTokensValue.value);
897947
modelSettings.topP = parseFloat(topPValue.value);
898948
modelSettings.stopSequences = stopSequencesInput.value.trim();
949+
if (enableThinkingToggle) modelSettings.enableThinking = !!enableThinkingToggle.checked;
950+
if (thinkingLevelSelector) modelSettings.thinkingLevel = (thinkingLevelSelector.value || "").toLowerCase();
951+
if (thinkingBudgetValue) {
952+
const budgetVal = parseInt(thinkingBudgetValue.value);
953+
modelSettings.thinkingBudget = isNaN(budgetVal) ? 8192 : budgetVal;
954+
}
955+
if (enableGoogleSearchToggle) modelSettings.enableGoogleSearch = !!enableGoogleSearchToggle.checked;
956+
899957

900958
try {
901959
localStorage.setItem(MODEL_SETTINGS_KEY, JSON.stringify(modelSettings));
@@ -930,6 +988,11 @@ function resetModelSettings() {
930988

931989
updateControlsForSelectedModel(); // This applies model-specific defaults to UI and modelSettings object
932990

991+
modelSettings.enableThinking = false;
992+
modelSettings.thinkingLevel = "";
993+
modelSettings.thinkingBudget = 8192;
994+
updateModelSettingsUI();
995+
933996
try {
934997
// Save these model-specific defaults (which are now in modelSettings) to localStorage
935998
// This makes the "reset" effectively a "reset to this model's defaults and save that"
@@ -1033,12 +1096,23 @@ function bindEventListeners() {
10331096
topPSlider.addEventListener('input', () => topPValue.value = topPSlider.value);
10341097
topPValue.addEventListener('input', () => { if (!isNaN(parseFloat(topPValue.value))) topPSlider.value = parseFloat(topPValue.value); });
10351098

1099+
if (thinkingBudgetSlider && thinkingBudgetValue) {
1100+
thinkingBudgetSlider.addEventListener('input', () => thinkingBudgetValue.value = thinkingBudgetSlider.value);
1101+
thinkingBudgetValue.addEventListener('input', () => { const v = parseInt(thinkingBudgetValue.value); if (!isNaN(v)) thinkingBudgetSlider.value = v; });
1102+
}
1103+
if (enableThinkingToggle) enableThinkingToggle.addEventListener('change', () => showSettingsStatus("思考模式设置已更新", false));
1104+
if (thinkingLevelSelector) thinkingLevelSelector.addEventListener('change', () => showSettingsStatus("思考等级已更新", false));
1105+
10361106
saveModelSettingsButton.addEventListener('click', saveModelSettings);
10371107
resetModelSettingsButton.addEventListener('click', resetModelSettings);
10381108

1109+
// Tools controls syncing
1110+
if (enableGoogleSearchToggle) enableGoogleSearchToggle.addEventListener('change', () => showSettingsStatus("Google Search 工具已更新", false));
1111+
1112+
10391113
const debouncedSave = debounce(saveModelSettings, 1000);
1040-
[systemPromptInput, temperatureValue, maxOutputTokensValue, topPValue, stopSequencesInput].forEach(
1041-
element => element.addEventListener('input', debouncedSave) // Use 'input' for more responsive auto-save
1114+
[systemPromptInput, temperatureValue, maxOutputTokensValue, topPValue, stopSequencesInput, thinkingBudgetValue, thinkingLevelSelector, enableThinkingToggle, enableGoogleSearchToggle].forEach(
1115+
element => element && element.addEventListener('input', debouncedSave)
10421116
);
10431117
}
10441118

0 commit comments

Comments
 (0)