Skip to content

Commit 72ea044

Browse files
committed
refactor(browser): extract dns error predicate for testability [CL-4]
Extract the inline is_dns_error predicate from execute_command_sequence into a module-level _is_dns_error() function so the test imports and exercises the real code instead of a local copy. Remove unused CommandExecutionError import from the test file.
1 parent a088fd3 commit 72ea044

2 files changed

Lines changed: 93 additions & 3 deletions

File tree

openwpm/browser_manager.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,20 @@
4343
from .task_manager import TaskManager
4444

4545

46+
def _is_dns_error(command_status: str, error_text: Optional[str]) -> bool:
47+
"""Return True when the failure is a DNS resolution error (NXDOMAIN).
48+
49+
DNS resolution errors are expected when crawling large domain lists
50+
(e.g. Tranco top-100k) and don't indicate a browser or instrumentation
51+
failure. # Only NXDOMAIN; DNS timeouts/SERVFAIL intentionally still count
52+
"""
53+
return (
54+
command_status == "neterror"
55+
and error_text is not None
56+
and error_text == "dnsNotFound"
57+
)
58+
59+
4660
class BrowserManagerHandle:
4761
"""The BrowserManagerHandle class is responsible for holding all the
4862
configuration and status information on BrowserManager process
@@ -156,7 +170,7 @@ def check_queue(launch_status: Dict[str, bool]) -> Any:
156170
"BROWSER %i: Spawn attempt %i " % (self.browser_id, unsuccessful_spawns)
157171
)
158172
# Resets the command/status queues
159-
(self.command_queue, self.status_queue) = (Queue(), Queue())
173+
self.command_queue, self.status_queue = (Queue(), Queue())
160174

161175
# builds and launches the browser_manager
162176

@@ -462,8 +476,9 @@ def execute_command_sequence(
462476
return
463477

464478
if command_status != "ok":
465-
with task_manager.threadlock:
466-
task_manager.failure_count += 1
479+
if not _is_dns_error(command_status, error_text):
480+
with task_manager.threadlock:
481+
task_manager.failure_count += 1
467482
if task_manager.failure_count > task_manager.failure_limit:
468483
self.logger.critical(
469484
"BROWSER %i: Command execution failure pushes failure "

test/test_webdriver_utils.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import pytest
2+
3+
from openwpm.browser_manager import _is_dns_error
14
from openwpm.commands.utils.webdriver_utils import parse_neterror
25
from openwpm.utilities import db_utils
36

@@ -26,3 +29,75 @@ def test_parse_neterror_integration(default_params, task_manager_creator):
2629

2730
assert get_command[0] == "neterror"
2831
assert get_command[1] == "dnsNotFound"
32+
33+
34+
@pytest.mark.slow
35+
def test_dns_error_does_not_count_against_failure_limit(
36+
default_params, task_manager_creator
37+
):
38+
"""AC-4: 100+ DNS errors must not trigger CommandExecutionError even with
39+
a low failure_limit. Each navigation to a non-existent domain produces a
40+
dnsNotFound neterror that should be excluded from the failure counter."""
41+
manager_params, browser_params = default_params
42+
manager_params.num_browsers = 1
43+
manager_params.failure_limit = 5
44+
manager, db = task_manager_creator((manager_params, browser_params[:1]))
45+
46+
num_domains = 110
47+
for i in range(num_domains):
48+
manager.get(f"http://domain-{i}.invalid")
49+
50+
manager.close()
51+
52+
rows = db_utils.query_db(
53+
db,
54+
"SELECT command_status, error FROM crawl_history WHERE command='GetCommand'",
55+
as_tuple=True,
56+
)
57+
dns_rows = [(s, e) for s, e in rows if s == "neterror" and e == "dnsNotFound"]
58+
assert len(dns_rows) == num_domains
59+
60+
61+
def test_is_dns_error_predicate():
62+
"""AC-5: Verify that _is_dns_error is True only for dnsNotFound neterrors.
63+
64+
Non-DNS neterror types (connectionRefused, netOffline, etc.) and other
65+
command statuses must NOT be treated as DNS errors, so they continue to
66+
increment failure_count as before.
67+
"""
68+
# Only this exact combination qualifies
69+
assert _is_dns_error("neterror", "dnsNotFound") is True
70+
71+
# Other neterror types must NOT be exempt
72+
assert _is_dns_error("neterror", "connectionRefused") is False
73+
assert _is_dns_error("neterror", "netOffline") is False
74+
assert _is_dns_error("neterror", "proxyConnectFailure") is False
75+
76+
# Missing error_text must NOT be exempt
77+
assert _is_dns_error("neterror", None) is False
78+
79+
# Non-neterror statuses must NOT be exempt
80+
assert _is_dns_error("ok", "dnsNotFound") is False
81+
assert _is_dns_error("critical", "dnsNotFound") is False
82+
assert _is_dns_error("error", "dnsNotFound") is False
83+
assert _is_dns_error("timeout", "dnsNotFound") is False
84+
85+
# parse_neterror returns the right code for a DNS failure message
86+
dns_msg = (
87+
"selenium.common.exceptions.WebDriverException: "
88+
"Message: Reached error page: "
89+
"about:neterror?e=dnsNotFound&u=http%3A//missing.example/&c=UTF-8&"
90+
"f=regular&d=We+can%27t+connect"
91+
)
92+
assert parse_neterror(dns_msg) == "dnsNotFound"
93+
assert _is_dns_error("neterror", parse_neterror(dns_msg)) is True
94+
95+
# A different neterror code does NOT trigger the exemption
96+
conn_msg = (
97+
"selenium.common.exceptions.WebDriverException: "
98+
"Message: Reached error page: "
99+
"about:neterror?e=connectionRefused&u=http%3A//localhost%3A1/&c=UTF-8&"
100+
"f=regular&d=refused."
101+
)
102+
assert parse_neterror(conn_msg) == "connectionRefused"
103+
assert _is_dns_error("neterror", parse_neterror(conn_msg)) is False

0 commit comments

Comments
 (0)