Skip to content

Commit 1d53a2a

Browse files
committed
feat(tests): normalize dynamic result hashes in VCR cassettes
Add stateful hash normalization to vcrpy_utils so that server-computed executionResult (40-hex:64-hex) and exportResult (40-hex) hashes are replaced with deterministic placeholders (EXECUTION_RESULT_N, EXPORT_RESULT_N) during cassette recording. This eliminates noisy diffs when re-recording cassettes from different environments. Normalization is applied to request URIs, request bodies, and response bodies via the existing before_record_* callbacks. risk: low
1 parent 3cc5480 commit 1d53a2a

3 files changed

Lines changed: 97 additions & 18 deletions

File tree

packages/gooddata-fdw/tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66
import yaml
77
from gooddata_fdw.environment import ColumnDefinition
8+
from tests_support.vcrpy_utils import configure_normalization
89

910

1011
def pytest_addoption(parser):
@@ -23,6 +24,7 @@ def test_config(request):
2324
with open(config_path) as f:
2425
config = yaml.safe_load(f)
2526

27+
configure_normalization(config)
2628
return config
2729

2830

packages/gooddata-pandas/tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import pytest
55
import yaml
6+
from tests_support.vcrpy_utils import configure_normalization
67

78

89
def pytest_addoption(parser):
@@ -21,4 +22,5 @@ def test_config(request):
2122
with open(config_path) as f:
2223
config = yaml.safe_load(f)
2324

25+
configure_normalization(config)
2426
return config

packages/tests-support/src/tests_support/vcrpy_utils.py

Lines changed: 93 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import annotations
33

44
import os
5+
import re
56
import typing
67
from typing import Any
78
from urllib.parse import urlparse
@@ -42,6 +43,50 @@
4243
_normalization_replacements: list[tuple[str, str]] = []
4344
_normalization_configured: bool = False
4445

46+
# --- Dynamic hash normalization ---
47+
# executionResult: 40-hex ":" 64-hex (body uses ":", URI uses "%3A")
48+
_EXEC_HASH_BODY_RE = re.compile(r"[0-9a-f]{40}:[0-9a-f]{64}")
49+
_EXEC_HASH_URI_RE = re.compile(r"[0-9a-f]{40}%3[Aa][0-9a-f]{64}")
50+
51+
# exportResult: 40-hex, anchored to /export/tabular/ in URIs
52+
# or preceded by "exportResult" context in bodies
53+
_EXPORT_HASH_URI_RE = re.compile(r"(?<=/export/tabular/)[0-9a-f]{40}(?![0-9a-f])")
54+
_EXPORT_HASH_BODY_RE = re.compile(
55+
r'(?<=exportResult": ")[0-9a-f]{40}(?![0-9a-f])' # JSON: "exportResult": "hash"
56+
r"|"
57+
r"(?<=exportResult: )[0-9a-f]{40}(?![0-9a-f])" # YAML: exportResult: hash
58+
)
59+
60+
61+
class _HashNormalizer:
62+
"""Maps server-computed hex hashes to deterministic placeholders.
63+
64+
Assigns stable placeholders (EXECUTION_RESULT_0, EXPORT_RESULT_0, etc.)
65+
to dynamic hashes that differ between environments and re-recordings.
66+
The same hash always maps to the same placeholder within a session.
67+
"""
68+
69+
def __init__(self) -> None:
70+
self._exec_map: dict[str, str] = {}
71+
self._export_map: dict[str, str] = {}
72+
self._exec_counter: int = 0
73+
self._export_counter: int = 0
74+
75+
def normalize_exec(self, full_hash: str) -> str:
76+
if full_hash not in self._exec_map:
77+
self._exec_map[full_hash] = f"EXECUTION_RESULT_{self._exec_counter}"
78+
self._exec_counter += 1
79+
return self._exec_map[full_hash]
80+
81+
def normalize_export(self, hex_hash: str) -> str:
82+
if hex_hash not in self._export_map:
83+
self._export_map[hex_hash] = f"EXPORT_RESULT_{self._export_counter}"
84+
self._export_counter += 1
85+
return self._export_map[hex_hash]
86+
87+
88+
_hash_normalizer = _HashNormalizer()
89+
4590

4691
def configure_normalization(test_config: dict[str, Any]) -> None:
4792
"""Build normalization replacements from the active test environment config.
@@ -117,6 +162,26 @@ def _apply_replacements(text: str) -> str:
117162
return text
118163

119164

165+
def _normalize_hashes_in_text(text: str) -> str:
166+
"""Replace executionResult/exportResult hashes with deterministic placeholders."""
167+
text = _EXEC_HASH_BODY_RE.sub(lambda m: _hash_normalizer.normalize_exec(m.group(0)), text)
168+
text = _EXPORT_HASH_BODY_RE.sub(lambda m: _hash_normalizer.normalize_export(m.group(0)), text)
169+
return text
170+
171+
172+
def _normalize_hashes_in_uri(uri: str) -> str:
173+
"""Replace executionResult/exportResult hashes in a request URI."""
174+
175+
def _replace_exec_uri(m: re.Match) -> str:
176+
# Convert URL-encoded %3A to plain colon for consistent mapping
177+
plain = m.group(0).replace("%3A", ":").replace("%3a", ":")
178+
return _hash_normalizer.normalize_exec(plain)
179+
180+
uri = _EXEC_HASH_URI_RE.sub(_replace_exec_uri, uri)
181+
uri = _EXPORT_HASH_URI_RE.sub(lambda m: _hash_normalizer.normalize_export(m.group(0)), uri)
182+
return uri
183+
184+
120185
def _normalize_body(body: str | None) -> str:
121186
"""Strip environment-specific fields from a JSON request body for matching."""
122187
if not body:
@@ -217,10 +282,9 @@ def serialize(self, cassette_dict: dict[str, Any]) -> str:
217282

218283

219284
def _normalize_uri(uri: str) -> str:
220-
"""Rewrite a request URI to use the canonical host."""
221-
if not _normalization_replacements:
222-
return uri
223-
return _apply_replacements(uri)
285+
"""Rewrite a request URI to use the canonical host and normalize dynamic hashes."""
286+
uri = _apply_replacements(uri) if _normalization_replacements else uri
287+
return _normalize_hashes_in_uri(uri)
224288

225289

226290
def custom_before_request(request, headers_str: str = HEADERS_STR):
@@ -233,12 +297,18 @@ def custom_before_request(request, headers_str: str = HEADERS_STR):
233297
# Normalize URI to canonical host
234298
request.uri = _normalize_uri(request.uri)
235299

236-
# Normalize environment-specific values in request body
237-
if _normalization_replacements and request.body:
300+
# Normalize environment-specific values and dynamic hashes in request body
301+
if request.body:
238302
if isinstance(request.body, bytes):
239-
request.body = _apply_replacements(request.body.decode("utf-8", errors="replace")).encode("utf-8")
303+
decoded = request.body.decode("utf-8", errors="replace")
304+
if _normalization_replacements:
305+
decoded = _apply_replacements(decoded)
306+
decoded = _normalize_hashes_in_text(decoded)
307+
request.body = decoded.encode("utf-8")
240308
elif isinstance(request.body, str):
241-
request.body = _apply_replacements(request.body)
309+
if _normalization_replacements:
310+
request.body = _apply_replacements(request.body)
311+
request.body = _normalize_hashes_in_text(request.body)
242312

243313
if hasattr(request, headers_str):
244314
request.headers = {header: request.headers[header] for header in sorted(request.headers)}
@@ -277,16 +347,21 @@ def custom_before_response(
277347
unified_headers[header] = value
278348
response[headers_str] = unified_headers
279349

280-
# Normalize environment-specific values in response body
281-
if _normalization_replacements:
282-
body = response.get("body")
283-
if body is not None:
284-
body_string = body.get("string")
285-
if body_string:
286-
if isinstance(body_string, bytes):
287-
body["string"] = _apply_replacements(body_string.decode("utf-8", errors="replace")).encode("utf-8")
288-
elif isinstance(body_string, str):
289-
body["string"] = _apply_replacements(body_string)
350+
# Normalize response body: environment-specific values and dynamic hashes
351+
body = response.get("body")
352+
if body is not None:
353+
body_string = body.get("string")
354+
if body_string:
355+
if isinstance(body_string, bytes):
356+
decoded = body_string.decode("utf-8", errors="replace")
357+
if _normalization_replacements:
358+
decoded = _apply_replacements(decoded)
359+
decoded = _normalize_hashes_in_text(decoded)
360+
body["string"] = decoded.encode("utf-8")
361+
elif isinstance(body_string, str):
362+
if _normalization_replacements:
363+
body_string = _apply_replacements(body_string)
364+
body["string"] = _normalize_hashes_in_text(body_string)
290365

291366
return response
292367

0 commit comments

Comments
 (0)