diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py
index 89fb782d9..0c7d0d75b 100644
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -2920,11 +2920,15 @@ def compute_head_fingerprint(head_html: str) -> str:
if not head_html:
return ""
- head_lower = head_html.lower()
signals = []
+ # Match tags/attributes case-insensitively, but extract values from the
+ # ORIGINAL head: lowercasing the whole head first would fold a case-only
+ # title/meta change (e.g. "iPhone" -> "IPHONE") to the same fingerprint, so
+ # the cache validator would treat a genuinely changed page as unchanged.
+
# Extract title
- title_match = re.search(r'
]*>(.*?)', head_lower, re.DOTALL)
+ title_match = re.search(r']*>(.*?)', head_html, re.DOTALL | re.IGNORECASE)
if title_match:
signals.append(title_match.group(1).strip())
@@ -2946,7 +2950,7 @@ def compute_head_fingerprint(head_html: str) -> str:
rf']*content=["\']([^"\']*)["\'][^>]*{attr_type}=["\']{re.escape(attr_value)}["\']',
]
for pattern in patterns:
- match = re.search(pattern, head_lower)
+ match = re.search(pattern, head_html, re.IGNORECASE)
if match:
signals.append(match.group(1).strip())
break # Found this tag, move to next
diff --git a/tests/cache_validation/test_head_fingerprint.py b/tests/cache_validation/test_head_fingerprint.py
index 287f255d8..3d10a162e 100644
--- a/tests/cache_validation/test_head_fingerprint.py
+++ b/tests/cache_validation/test_head_fingerprint.py
@@ -95,3 +95,26 @@ def test_real_world_head(self):
assert fp != ""
# Should be deterministic
assert fp == compute_head_fingerprint(head)
+
+ def test_value_case_change_changes_fingerprint(self):
+ """A case-only change in a title/meta *value* must change the
+ fingerprint, otherwise the cache validator treats a genuinely updated
+ page as unchanged and serves stale content. Regression."""
+ assert compute_head_fingerprint(
+ "iPhone"
+ ) != compute_head_fingerprint("IPHONE")
+ assert compute_head_fingerprint(
+ ''
+ ) != compute_head_fingerprint(
+ ''
+ )
+
+ def test_tag_and_attribute_case_does_not_change_fingerprint(self):
+ """Tag/attribute case is still matched case-insensitively; only the
+ markup case (not the values) differing yields the same fingerprint."""
+ assert compute_head_fingerprint(
+ "Hello"
+ ) == compute_head_fingerprint("Hello")
+ assert compute_head_fingerprint(
+ ''
+ ) == compute_head_fingerprint('')