22from __future__ import annotations
33
44import os
5+ import re
56import typing
67from typing import Any
78from urllib .parse import urlparse
4243_normalization_replacements : list [tuple [str , str ]] = []
4344_normalization_configured : bool = False
4445
46+ # --- Dynamic hash normalization ---
47+ # executionResult: 40-hex ":" 64-hex (body uses ":", URI uses "%3A")
48+ _EXEC_HASH_BODY_RE = re .compile (r"[0-9a-f]{40}:[0-9a-f]{64}" )
49+ _EXEC_HASH_URI_RE = re .compile (r"[0-9a-f]{40}%3[Aa][0-9a-f]{64}" )
50+
51+ # exportResult: 40-hex, anchored to /export/tabular/ in URIs
52+ # or preceded by "exportResult" context in bodies
53+ _EXPORT_HASH_URI_RE = re .compile (r"(?<=/export/tabular/)[0-9a-f]{40}(?![0-9a-f])" )
54+ _EXPORT_HASH_BODY_RE = re .compile (
55+ r'(?<=exportResult": ")[0-9a-f]{40}(?![0-9a-f])' # JSON: "exportResult": "hash"
56+ r"|"
57+ r"(?<=exportResult: )[0-9a-f]{40}(?![0-9a-f])" # YAML: exportResult: hash
58+ )
59+
60+
61+ class _HashNormalizer :
62+ """Maps server-computed hex hashes to deterministic placeholders.
63+
64+ Assigns stable placeholders (EXECUTION_RESULT_0, EXPORT_RESULT_0, etc.)
65+ to dynamic hashes that differ between environments and re-recordings.
66+ The same hash always maps to the same placeholder within a session.
67+ """
68+
69+ def __init__ (self ) -> None :
70+ self ._exec_map : dict [str , str ] = {}
71+ self ._export_map : dict [str , str ] = {}
72+ self ._exec_counter : int = 0
73+ self ._export_counter : int = 0
74+
75+ def normalize_exec (self , full_hash : str ) -> str :
76+ if full_hash not in self ._exec_map :
77+ self ._exec_map [full_hash ] = f"EXECUTION_RESULT_{ self ._exec_counter } "
78+ self ._exec_counter += 1
79+ return self ._exec_map [full_hash ]
80+
81+ def normalize_export (self , hex_hash : str ) -> str :
82+ if hex_hash not in self ._export_map :
83+ self ._export_map [hex_hash ] = f"EXPORT_RESULT_{ self ._export_counter } "
84+ self ._export_counter += 1
85+ return self ._export_map [hex_hash ]
86+
87+
88+ _hash_normalizer = _HashNormalizer ()
89+
4590
4691def configure_normalization (test_config : dict [str , Any ]) -> None :
4792 """Build normalization replacements from the active test environment config.
@@ -117,6 +162,26 @@ def _apply_replacements(text: str) -> str:
117162 return text
118163
119164
165+ def _normalize_hashes_in_text (text : str ) -> str :
166+ """Replace executionResult/exportResult hashes with deterministic placeholders."""
167+ text = _EXEC_HASH_BODY_RE .sub (lambda m : _hash_normalizer .normalize_exec (m .group (0 )), text )
168+ text = _EXPORT_HASH_BODY_RE .sub (lambda m : _hash_normalizer .normalize_export (m .group (0 )), text )
169+ return text
170+
171+
172+ def _normalize_hashes_in_uri (uri : str ) -> str :
173+ """Replace executionResult/exportResult hashes in a request URI."""
174+
175+ def _replace_exec_uri (m : re .Match ) -> str :
176+ # Convert URL-encoded %3A to plain colon for consistent mapping
177+ plain = m .group (0 ).replace ("%3A" , ":" ).replace ("%3a" , ":" )
178+ return _hash_normalizer .normalize_exec (plain )
179+
180+ uri = _EXEC_HASH_URI_RE .sub (_replace_exec_uri , uri )
181+ uri = _EXPORT_HASH_URI_RE .sub (lambda m : _hash_normalizer .normalize_export (m .group (0 )), uri )
182+ return uri
183+
184+
120185def _normalize_body (body : str | None ) -> str :
121186 """Strip environment-specific fields from a JSON request body for matching."""
122187 if not body :
@@ -217,10 +282,9 @@ def serialize(self, cassette_dict: dict[str, Any]) -> str:
217282
218283
219284def _normalize_uri (uri : str ) -> str :
220- """Rewrite a request URI to use the canonical host."""
221- if not _normalization_replacements :
222- return uri
223- return _apply_replacements (uri )
285+ """Rewrite a request URI to use the canonical host and normalize dynamic hashes."""
286+ uri = _apply_replacements (uri ) if _normalization_replacements else uri
287+ return _normalize_hashes_in_uri (uri )
224288
225289
226290def custom_before_request (request , headers_str : str = HEADERS_STR ):
@@ -233,12 +297,18 @@ def custom_before_request(request, headers_str: str = HEADERS_STR):
233297 # Normalize URI to canonical host
234298 request .uri = _normalize_uri (request .uri )
235299
236- # Normalize environment-specific values in request body
237- if _normalization_replacements and request .body :
300+ # Normalize environment-specific values and dynamic hashes in request body
301+ if request .body :
238302 if isinstance (request .body , bytes ):
239- request .body = _apply_replacements (request .body .decode ("utf-8" , errors = "replace" )).encode ("utf-8" )
303+ decoded = request .body .decode ("utf-8" , errors = "replace" )
304+ if _normalization_replacements :
305+ decoded = _apply_replacements (decoded )
306+ decoded = _normalize_hashes_in_text (decoded )
307+ request .body = decoded .encode ("utf-8" )
240308 elif isinstance (request .body , str ):
241- request .body = _apply_replacements (request .body )
309+ if _normalization_replacements :
310+ request .body = _apply_replacements (request .body )
311+ request .body = _normalize_hashes_in_text (request .body )
242312
243313 if hasattr (request , headers_str ):
244314 request .headers = {header : request .headers [header ] for header in sorted (request .headers )}
@@ -277,16 +347,21 @@ def custom_before_response(
277347 unified_headers [header ] = value
278348 response [headers_str ] = unified_headers
279349
280- # Normalize environment-specific values in response body
281- if _normalization_replacements :
282- body = response .get ("body" )
283- if body is not None :
284- body_string = body .get ("string" )
285- if body_string :
286- if isinstance (body_string , bytes ):
287- body ["string" ] = _apply_replacements (body_string .decode ("utf-8" , errors = "replace" )).encode ("utf-8" )
288- elif isinstance (body_string , str ):
289- body ["string" ] = _apply_replacements (body_string )
350+ # Normalize response body: environment-specific values and dynamic hashes
351+ body = response .get ("body" )
352+ if body is not None :
353+ body_string = body .get ("string" )
354+ if body_string :
355+ if isinstance (body_string , bytes ):
356+ decoded = body_string .decode ("utf-8" , errors = "replace" )
357+ if _normalization_replacements :
358+ decoded = _apply_replacements (decoded )
359+ decoded = _normalize_hashes_in_text (decoded )
360+ body ["string" ] = decoded .encode ("utf-8" )
361+ elif isinstance (body_string , str ):
362+ if _normalization_replacements :
363+ body_string = _apply_replacements (body_string )
364+ body ["string" ] = _normalize_hashes_in_text (body_string )
290365
291366 return response
292367
0 commit comments