|
1 | 1 | import fs from "node:fs"; |
2 | 2 | import path from "node:path"; |
3 | | -import type { Page, Locator } from "playwright"; |
| 3 | +import type { Page, Locator } from "patchright"; |
4 | 4 | import type { SelectorReport, ToolContent } from "./types.js"; |
5 | 5 |
|
6 | 6 | /** |
@@ -139,3 +139,147 @@ export async function screenshotOnError( |
139 | 139 | function sleep(ms: number): Promise<void> { |
140 | 140 | return new Promise((resolve) => setTimeout(resolve, ms)); |
141 | 141 | } |
| 142 | + |
| 143 | +/** |
| 144 | + * Detect rate limiting or security challenges after a navigation. |
| 145 | + * |
| 146 | + * Modeled on stickerdaniel/linkedin-mcp-server's approach: |
| 147 | + * 1. URL-based check: /checkpoint or authwall in the URL = security challenge |
| 148 | + * 2. Content-based check: only runs on error-shaped pages (no <main> element, |
| 149 | + * body text < 2000 chars). Guards against false positives on real content pages |
| 150 | + * that incidentally contain phrases like "slow down". |
| 151 | + * |
| 152 | + * Throws an Error if rate limiting is detected — the caller (wrapToolCall) will |
| 153 | + * catch this and return isError:true so the AI knows to wait before retrying. |
| 154 | + */ |
| 155 | +export async function detectRateLimit(page: Page): Promise<void> { |
| 156 | + const url = page.url(); |
| 157 | + |
| 158 | + // URL-based: security checkpoints always redirect to known paths |
| 159 | + if (url.includes("/checkpoint") || url.includes("authwall")) { |
| 160 | + throw new Error( |
| 161 | + `Rate limit or security challenge detected at: ${url}. ` + |
| 162 | + "Wait a few minutes before retrying. If this persists, run `browserkit login <site>` to re-authenticate." |
| 163 | + ); |
| 164 | + } |
| 165 | + |
| 166 | + // Content-based: only run on error-shaped pages (minimal, no <main>) |
| 167 | + try { |
| 168 | + const hasMain = await page.locator("main").count() > 0; |
| 169 | + if (hasMain) return; // real content page — skip heuristic |
| 170 | + |
| 171 | + const bodyText = await page.locator("body").innerText({ timeout: 1000 }).catch(() => ""); |
| 172 | + if (bodyText && bodyText.length < 2_000) { |
| 173 | + const lower = bodyText.toLowerCase(); |
| 174 | + const rateLimitPhrases = ["too many requests", "rate limit", "slow down", "try again later"]; |
| 175 | + if (rateLimitPhrases.some((p) => lower.includes(p))) { |
| 176 | + throw new Error( |
| 177 | + `Rate limit message detected on page (${url}). ` + |
| 178 | + "Wait before retrying." |
| 179 | + ); |
| 180 | + } |
| 181 | + } |
| 182 | + } catch (err) { |
| 183 | + // Re-throw rate limit errors; swallow page read errors |
| 184 | + if (err instanceof Error && err.message.includes("Rate limit")) throw err; |
| 185 | + } |
| 186 | +} |
| 187 | + |
| 188 | +/** |
| 189 | + * Dismiss popup modals that may be blocking content. |
| 190 | + * |
| 191 | + * Tries a set of ARIA-stable selectors in order. Returns true if a modal |
| 192 | + * was dismissed, false if nothing was found. Failures are silently swallowed. |
| 193 | + * |
| 194 | + * The artdeco selector is LinkedIn-specific but harmless on other sites. |
| 195 | + */ |
| 196 | +export async function dismissModals(page: Page): Promise<boolean> { |
| 197 | + const dismissSelectors = [ |
| 198 | + 'button[aria-label="Dismiss"]', |
| 199 | + 'button[aria-label="Close"]', |
| 200 | + 'button[aria-label="Dismiss dialog"]', |
| 201 | + "button.artdeco-modal__dismiss", |
| 202 | + ]; |
| 203 | + |
| 204 | + for (const selector of dismissSelectors) { |
| 205 | + try { |
| 206 | + const btn = page.locator(selector).first(); |
| 207 | + if (await btn.isVisible({ timeout: 800 })) { |
| 208 | + await btn.click(); |
| 209 | + await sleep(400); |
| 210 | + return true; |
| 211 | + } |
| 212 | + } catch { |
| 213 | + // try next selector |
| 214 | + } |
| 215 | + } |
| 216 | + return false; |
| 217 | +} |
| 218 | + |
| 219 | +/** |
| 220 | + * Scroll the nearest scrollable ancestor of `anchorSelector` until no new |
| 221 | + * content loads or `maxScrolls` is reached. |
| 222 | + * |
| 223 | + * This is the correct approach for sites with nested scrollable containers |
| 224 | + * (LinkedIn job sidebar, LinkedIn feed, etc.) where `window.scrollBy` has |
| 225 | + * no effect because the scrollable element is not the window. |
| 226 | + * |
| 227 | + * @param anchorSelector CSS selector for any element inside the container |
| 228 | + * @param options.pauseMs ms to wait between scrolls (default: 1000) |
| 229 | + * @param options.maxScrolls maximum number of scroll attempts (default: 10) |
| 230 | + * @returns number of scrolls performed (-1 if no scrollable container found) |
| 231 | + */ |
| 232 | +export async function scrollContainer( |
| 233 | + page: Page, |
| 234 | + anchorSelector: string, |
| 235 | + options: { pauseMs?: number; maxScrolls?: number } = {} |
| 236 | +): Promise<number> { |
| 237 | + const { pauseMs = 1000, maxScrolls = 10 } = options; |
| 238 | + |
| 239 | + const scrollCount = await page.evaluate( |
| 240 | + ({ sel, pauseTime, maxScrolls: max }) => { |
| 241 | + // Find the anchor element, then walk up to the first scrollable ancestor |
| 242 | + const anchor = document.querySelector(sel); |
| 243 | + if (!anchor) return -1; |
| 244 | + |
| 245 | + let container: Element | null = anchor.parentElement; |
| 246 | + while (container && container !== document.body) { |
| 247 | + const style = window.getComputedStyle(container); |
| 248 | + const overflowY = style.overflowY; |
| 249 | + if ( |
| 250 | + (overflowY === "auto" || overflowY === "scroll") && |
| 251 | + container.scrollHeight > container.clientHeight |
| 252 | + ) { |
| 253 | + break; |
| 254 | + } |
| 255 | + container = container.parentElement; |
| 256 | + } |
| 257 | + |
| 258 | + if (!container || container === document.body) return -1; |
| 259 | + |
| 260 | + // Scroll iteratively until content stops growing |
| 261 | + let count = 0; |
| 262 | + const scroll = (): Promise<number> => |
| 263 | + new Promise((resolve) => { |
| 264 | + let i = 0; |
| 265 | + const step = () => { |
| 266 | + if (i >= max) { resolve(count); return; } |
| 267 | + const prev = container!.scrollHeight; |
| 268 | + container!.scrollTop = container!.scrollHeight; |
| 269 | + setTimeout(() => { |
| 270 | + if (container!.scrollHeight === prev) { resolve(count); return; } |
| 271 | + count++; |
| 272 | + i++; |
| 273 | + step(); |
| 274 | + }, pauseTime); |
| 275 | + }; |
| 276 | + step(); |
| 277 | + }); |
| 278 | + |
| 279 | + return scroll(); |
| 280 | + }, |
| 281 | + { sel: anchorSelector, pauseTime: pauseMs, maxScrolls } |
| 282 | + ); |
| 283 | + |
| 284 | + return scrollCount; |
| 285 | +} |
0 commit comments