|
| 1 | +import pytest |
| 2 | + |
| 3 | +from openwpm.browser_manager import _is_dns_error |
1 | 4 | from openwpm.commands.utils.webdriver_utils import parse_neterror |
2 | 5 | from openwpm.utilities import db_utils |
3 | 6 |
|
@@ -26,3 +29,75 @@ def test_parse_neterror_integration(default_params, task_manager_creator): |
26 | 29 |
|
27 | 30 | assert get_command[0] == "neterror" |
28 | 31 | assert get_command[1] == "dnsNotFound" |
| 32 | + |
| 33 | + |
| 34 | +@pytest.mark.slow |
| 35 | +def test_dns_error_does_not_count_against_failure_limit( |
| 36 | + default_params, task_manager_creator |
| 37 | +): |
| 38 | + """AC-4: 100+ DNS errors must not trigger CommandExecutionError even with |
| 39 | + a low failure_limit. Each navigation to a non-existent domain produces a |
| 40 | + dnsNotFound neterror that should be excluded from the failure counter.""" |
| 41 | + manager_params, browser_params = default_params |
| 42 | + manager_params.num_browsers = 1 |
| 43 | + manager_params.failure_limit = 5 |
| 44 | + manager, db = task_manager_creator((manager_params, browser_params[:1])) |
| 45 | + |
| 46 | + num_domains = 110 |
| 47 | + for i in range(num_domains): |
| 48 | + manager.get(f"http://domain-{i}.invalid") |
| 49 | + |
| 50 | + manager.close() |
| 51 | + |
| 52 | + rows = db_utils.query_db( |
| 53 | + db, |
| 54 | + "SELECT command_status, error FROM crawl_history WHERE command='GetCommand'", |
| 55 | + as_tuple=True, |
| 56 | + ) |
| 57 | + dns_rows = [(s, e) for s, e in rows if s == "neterror" and e == "dnsNotFound"] |
| 58 | + assert len(dns_rows) == num_domains |
| 59 | + |
| 60 | + |
| 61 | +def test_is_dns_error_predicate(): |
| 62 | + """AC-5: Verify that _is_dns_error is True only for dnsNotFound neterrors. |
| 63 | +
|
| 64 | + Non-DNS neterror types (connectionRefused, netOffline, etc.) and other |
| 65 | + command statuses must NOT be treated as DNS errors, so they continue to |
| 66 | + increment failure_count as before. |
| 67 | + """ |
| 68 | + # Only this exact combination qualifies |
| 69 | + assert _is_dns_error("neterror", "dnsNotFound") is True |
| 70 | + |
| 71 | + # Other neterror types must NOT be exempt |
| 72 | + assert _is_dns_error("neterror", "connectionRefused") is False |
| 73 | + assert _is_dns_error("neterror", "netOffline") is False |
| 74 | + assert _is_dns_error("neterror", "proxyConnectFailure") is False |
| 75 | + |
| 76 | + # Missing error_text must NOT be exempt |
| 77 | + assert _is_dns_error("neterror", None) is False |
| 78 | + |
| 79 | + # Non-neterror statuses must NOT be exempt |
| 80 | + assert _is_dns_error("ok", "dnsNotFound") is False |
| 81 | + assert _is_dns_error("critical", "dnsNotFound") is False |
| 82 | + assert _is_dns_error("error", "dnsNotFound") is False |
| 83 | + assert _is_dns_error("timeout", "dnsNotFound") is False |
| 84 | + |
| 85 | + # parse_neterror returns the right code for a DNS failure message |
| 86 | + dns_msg = ( |
| 87 | + "selenium.common.exceptions.WebDriverException: " |
| 88 | + "Message: Reached error page: " |
| 89 | + "about:neterror?e=dnsNotFound&u=http%3A//missing.example/&c=UTF-8&" |
| 90 | + "f=regular&d=We+can%27t+connect" |
| 91 | + ) |
| 92 | + assert parse_neterror(dns_msg) == "dnsNotFound" |
| 93 | + assert _is_dns_error("neterror", parse_neterror(dns_msg)) is True |
| 94 | + |
| 95 | + # A different neterror code does NOT trigger the exemption |
| 96 | + conn_msg = ( |
| 97 | + "selenium.common.exceptions.WebDriverException: " |
| 98 | + "Message: Reached error page: " |
| 99 | + "about:neterror?e=connectionRefused&u=http%3A//localhost%3A1/&c=UTF-8&" |
| 100 | + "f=regular&d=refused." |
| 101 | + ) |
| 102 | + assert parse_neterror(conn_msg) == "connectionRefused" |
| 103 | + assert _is_dns_error("neterror", parse_neterror(conn_msg)) is False |
0 commit comments