Skip to content

Commit 61c3a58

Browse files
authored
Merge pull request #14 from PredicateSystems/test_can
test canonicalization
2 parents b4c7d96 + 8ee2d19 commit 61c3a58

2 files changed

Lines changed: 348 additions & 0 deletions

File tree

docs/predicate-authority-user-manual.md

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -610,8 +610,122 @@ if not decision.allowed:
610610

611611
---
612612

613+
## Canonicalization for reproducible state hashes
614+
615+
`predicate_contracts` includes canonicalization functions for computing reproducible
616+
`state_hash` values. This ensures that semantically equivalent inputs produce
617+
identical hashes, even with superficial differences like whitespace or ANSI codes.
618+
619+
### Terminal canonicalization
620+
621+
```python
622+
from predicate_contracts import (
623+
canonicalize_terminal_snapshot,
624+
compute_terminal_state_hash,
625+
TERMINAL_SCHEMA_VERSION,
626+
)
627+
628+
# Raw terminal state with ANSI codes and extra whitespace
629+
raw_snapshot = {
630+
"session_id": "sess-123",
631+
"command": " npm test ",
632+
"transcript": "\x1b[32mPASS\x1b[0m All tests passed at 10:30:45",
633+
"cwd": "/home/user/./project/../project",
634+
}
635+
636+
# Compute canonical hash
637+
state_hash = compute_terminal_state_hash(raw_snapshot)
638+
# Returns: "sha256:..." (64-char hex with prefix)
639+
640+
# View normalized snapshot
641+
canonical = canonicalize_terminal_snapshot(raw_snapshot)
642+
print(canonical.command_normalized) # "npm test"
643+
print(canonical.transcript_normalized) # "PASS All tests passed at [TIMESTAMP]"
644+
print(canonical.cwd_normalized) # "/home/user/project"
645+
```
646+
647+
**What gets normalized:**
648+
649+
| Field | Normalization | Example |
650+
|-------|---------------|---------|
651+
| `command` | Trim, collapse whitespace | `" ls -la "``"ls -la"` |
652+
| `transcript` | Strip ANSI, normalize timestamps, collapse whitespace | `"\x1b[32mOK\x1b[0m 10:30:45"``"OK [TIMESTAMP]"` |
653+
| `cwd` | Resolve `.` and `..` | `"/foo/./bar/../baz"``"/foo/baz"` |
654+
| `env` | Sort keys, redact secrets | `AWS_SECRET_KEY``[REDACTED]` |
655+
656+
### Desktop accessibility canonicalization
657+
658+
```python
659+
from predicate_contracts import (
660+
canonicalize_desktop_snapshot,
661+
compute_desktop_state_hash,
662+
canonicalize_accessibility_node,
663+
DESKTOP_SCHEMA_VERSION,
664+
)
665+
666+
# Raw desktop state with varying case/whitespace
667+
raw_snapshot = {
668+
"app_name": " FIREFOX ",
669+
"window_title": " GitHub - Pull Requests ",
670+
"focused_role": "BUTTON",
671+
"focused_name": " Merge ",
672+
}
673+
674+
# Compute canonical hash
675+
state_hash = compute_desktop_state_hash(raw_snapshot)
676+
677+
# View normalized snapshot
678+
canonical = canonicalize_desktop_snapshot(raw_snapshot)
679+
print(canonical.app_name_norm) # "firefox"
680+
print(canonical.window_title_norm) # "github - pull requests"
681+
print(canonical.focused_path) # "button[merge]"
682+
```
683+
684+
**UI tree determinism:**
685+
686+
```python
687+
# Children are sorted by (role, name) for deterministic hashing
688+
tree = {
689+
"role": "window",
690+
"children": [
691+
{"role": "button", "name": "Cancel"},
692+
{"role": "button", "name": "OK"},
693+
],
694+
}
695+
canonical = canonicalize_accessibility_node(tree)
696+
# Children sorted: OK comes before Cancel alphabetically
697+
```
698+
699+
### Schema versions
700+
701+
Use schema versions for forward compatibility:
702+
703+
- `TERMINAL_SCHEMA_VERSION = "terminal:v1.0"`
704+
- `DESKTOP_SCHEMA_VERSION = "desktop:v1.0"`
705+
706+
These are included in `StateEvidence.schema_version` to track canonicalization format.
707+
708+
### Utility functions
709+
710+
```python
711+
from predicate_contracts import (
712+
normalize_text, # Lowercase, trim, collapse whitespace
713+
normalize_command, # Trim, collapse whitespace (preserves case)
714+
strip_ansi, # Remove ANSI escape codes
715+
normalize_timestamps, # Replace timestamps with [TIMESTAMP]
716+
normalize_transcript, # Full transcript normalization
717+
normalize_path, # Resolve . and .. in paths
718+
is_secret_key, # Check if env var should be redacted
719+
hash_environment, # Hash env vars with secret redaction
720+
sha256, # Compute SHA-256 hash
721+
)
722+
```
723+
724+
---
725+
613726
## Where to go next
614727

615728
- Operations guide: `docs/authorityd-operations.md`
616729
- Architecture proposal: `docs/predicate_authority_docs/better-sdk-opportunity-proposal.md`
617730
- Governance sign-off tracker: `docs/predicate_authority_docs/governance-signoff-tracker.md`
731+
- Canonicalization design: `docs/predicate_claw/NON_WEB_CANONICALIZATION_DESIGN.md`

tests/test_canonicalization.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,3 +319,237 @@ def test_equivalent_inputs_produce_identical_hashes(self) -> None:
319319

320320
def test_exports_desktop_schema_version(self) -> None:
321321
assert DESKTOP_SCHEMA_VERSION == "desktop:v1.0"
322+
323+
324+
class TestPhase4Verification:
325+
"""Phase 4 verification tests for cross-platform, ANSI edge cases, and UI tree determinism."""
326+
327+
class TestCrossPlatformPathNormalization:
328+
"""Cross-platform path normalization tests."""
329+
330+
def test_normalizes_unix_paths_with_dot_components(self) -> None:
331+
result = normalize_path("/home/user/./project/../project/src")
332+
assert "/." not in result
333+
assert "/.." not in result
334+
assert "project" in result
335+
assert "src" in result
336+
337+
def test_handles_multiple_consecutive_slashes(self) -> None:
338+
result = normalize_path("/foo//bar///baz")
339+
assert "//" not in result
340+
341+
def test_preserves_absolute_paths(self) -> None:
342+
result = normalize_path("/absolute/path/to/file")
343+
assert result.startswith("/")
344+
345+
def test_handles_empty_path_components(self) -> None:
346+
result = normalize_path("/foo/./bar")
347+
assert result == "/foo/bar"
348+
349+
def test_handles_trailing_slashes_consistently(self) -> None:
350+
with_slash = normalize_path("/foo/bar/")
351+
without_slash = normalize_path("/foo/bar")
352+
assert with_slash.rstrip("/") == without_slash.rstrip("/")
353+
354+
def test_handles_root_path(self) -> None:
355+
result = normalize_path("/")
356+
assert result == "/"
357+
358+
class TestAnsiStrippingEdgeCases:
359+
"""ANSI stripping edge case tests."""
360+
361+
def test_strips_256_color_codes(self) -> None:
362+
assert strip_ansi("\x1b[38;5;196mRed256\x1b[0m") == "Red256"
363+
assert strip_ansi("\x1b[48;5;21mBlueBg\x1b[0m") == "BlueBg"
364+
365+
def test_strips_24bit_true_color_codes(self) -> None:
366+
assert strip_ansi("\x1b[38;2;255;100;50mOrange\x1b[0m") == "Orange"
367+
368+
def test_strips_bold_italic_underline_codes(self) -> None:
369+
assert strip_ansi("\x1b[1mBold\x1b[0m") == "Bold"
370+
assert strip_ansi("\x1b[3mItalic\x1b[0m") == "Italic"
371+
assert strip_ansi("\x1b[4mUnderline\x1b[0m") == "Underline"
372+
373+
def test_strips_cursor_movement_codes(self) -> None:
374+
assert strip_ansi("\x1b[5ACursor Up") == "Cursor Up"
375+
assert strip_ansi("\x1b[3BCursor Down") == "Cursor Down"
376+
assert strip_ansi("\x1b[2CCursor Forward") == "Cursor Forward"
377+
assert strip_ansi("\x1b[1DCursor Back") == "Cursor Back"
378+
379+
def test_strips_erase_codes(self) -> None:
380+
assert strip_ansi("\x1b[2JClear Screen") == "Clear Screen"
381+
assert strip_ansi("\x1b[KClear Line") == "Clear Line"
382+
383+
def test_strips_scroll_codes(self) -> None:
384+
assert strip_ansi("\x1b[3SScroll Up") == "Scroll Up"
385+
assert strip_ansi("\x1b[2TScroll Down") == "Scroll Down"
386+
387+
def test_handles_multiple_ansi_codes_in_sequence(self) -> None:
388+
complex_text = "\x1b[1m\x1b[31m\x1b[4mBold Red Underline\x1b[0m"
389+
assert strip_ansi(complex_text) == "Bold Red Underline"
390+
391+
def test_handles_ansi_codes_at_start_middle_and_end(self) -> None:
392+
text = "\x1b[32mStart\x1b[0m Middle \x1b[33mEnd\x1b[0m"
393+
assert strip_ansi(text) == "Start Middle End"
394+
395+
def test_preserves_text_without_ansi_codes(self) -> None:
396+
plain = "No escape codes here: [not ansi] {also not}"
397+
assert strip_ansi(plain) == plain
398+
399+
class TestUITreeDeterminism:
400+
"""UI tree determinism tests."""
401+
402+
def test_produces_same_hash_regardless_of_child_order(self) -> None:
403+
tree1 = {
404+
"role": "window",
405+
"name": "Main",
406+
"children": [
407+
{"role": "button", "name": "Save", "children": []},
408+
{"role": "button", "name": "Cancel", "children": []},
409+
{"role": "textbox", "name": "Input", "children": []},
410+
],
411+
}
412+
tree2 = {
413+
"role": "window",
414+
"name": "Main",
415+
"children": [
416+
{"role": "textbox", "name": "Input", "children": []},
417+
{"role": "button", "name": "Cancel", "children": []},
418+
{"role": "button", "name": "Save", "children": []},
419+
],
420+
}
421+
422+
canonical1 = canonicalize_accessibility_node(tree1)
423+
canonical2 = canonicalize_accessibility_node(tree2)
424+
425+
assert canonical1 == canonical2
426+
427+
def test_normalizes_role_case(self) -> None:
428+
upper = canonicalize_accessibility_node(
429+
{"role": "BUTTON", "name": "Click", "children": []}
430+
)
431+
lower = canonicalize_accessibility_node(
432+
{"role": "button", "name": "Click", "children": []}
433+
)
434+
435+
assert upper.role == lower.role
436+
assert upper.role == "button"
437+
438+
def test_normalizes_name_whitespace_and_case(self) -> None:
439+
node1 = canonicalize_accessibility_node(
440+
{"role": "button", "name": " Click Me ", "children": []}
441+
)
442+
node2 = canonicalize_accessibility_node(
443+
{"role": "button", "name": "click me", "children": []}
444+
)
445+
446+
assert node1.name_norm == node2.name_norm
447+
assert node1.name_norm == "click me"
448+
449+
def test_handles_empty_children_list(self) -> None:
450+
node = canonicalize_accessibility_node(
451+
{"role": "button", "name": "Test", "children": []}
452+
)
453+
assert node.children == ()
454+
455+
def test_handles_missing_children(self) -> None:
456+
node = canonicalize_accessibility_node({"role": "button", "name": "Test"})
457+
assert node.children == ()
458+
459+
def test_handles_none_name(self) -> None:
460+
node = canonicalize_accessibility_node({"role": "button", "name": None, "children": []})
461+
assert node.name_norm == ""
462+
463+
def test_produces_identical_desktop_hashes_for_same_content(self) -> None:
464+
snap1 = {
465+
"app_name": " FIREFOX ",
466+
"window_title": " GitHub - Pull Requests ",
467+
"focused_role": "BUTTON",
468+
"focused_name": " MERGE ",
469+
}
470+
snap2 = {
471+
"app_name": "firefox",
472+
"window_title": "github - pull requests",
473+
"focused_role": "button",
474+
"focused_name": "merge",
475+
}
476+
477+
assert compute_desktop_state_hash(snap1) == compute_desktop_state_hash(snap2)
478+
479+
def test_sorts_nested_children_deterministically(self) -> None:
480+
tree = {
481+
"role": "window",
482+
"children": [
483+
{
484+
"role": "panel",
485+
"name": "B",
486+
"children": [
487+
{"role": "button", "name": "Z", "children": []},
488+
{"role": "button", "name": "A", "children": []},
489+
],
490+
},
491+
{
492+
"role": "panel",
493+
"name": "A",
494+
"children": [
495+
{"role": "link", "name": "Y", "children": []},
496+
{"role": "link", "name": "X", "children": []},
497+
],
498+
},
499+
],
500+
}
501+
502+
canonical = canonicalize_accessibility_node(tree)
503+
504+
# First-level: panel A should come before panel B
505+
assert canonical.children[0].name_norm == "a"
506+
assert canonical.children[1].name_norm == "b"
507+
508+
# Second-level: within panel A, link X should come before link Y
509+
assert canonical.children[0].children[0].name_norm == "x"
510+
assert canonical.children[0].children[1].name_norm == "y"
511+
512+
# Within panel B, button A should come before button Z
513+
assert canonical.children[1].children[0].name_norm == "a"
514+
assert canonical.children[1].children[1].name_norm == "z"
515+
516+
class TestTerminalHashStability:
517+
"""Terminal hash stability tests."""
518+
519+
def test_identical_hashes_for_varying_whitespace(self) -> None:
520+
snap1 = {"session_id": "s1", "command": " npm run build "}
521+
snap2 = {"session_id": "s1", "command": "npm run build"}
522+
523+
assert compute_terminal_state_hash(snap1) == compute_terminal_state_hash(snap2)
524+
525+
def test_identical_hashes_for_transcripts_with_ansi_removed(self) -> None:
526+
snap1 = {
527+
"session_id": "s1",
528+
"command": "test",
529+
"transcript": "\x1b[32m✓\x1b[0m Tests passed",
530+
}
531+
snap2 = {
532+
"session_id": "s1",
533+
"command": "test",
534+
"transcript": "✓ Tests passed",
535+
}
536+
537+
assert compute_terminal_state_hash(snap1) == compute_terminal_state_hash(snap2)
538+
539+
def test_different_hashes_for_different_commands(self) -> None:
540+
snap1 = {"session_id": "s1", "command": "npm install"}
541+
snap2 = {"session_id": "s1", "command": "npm update"}
542+
543+
assert compute_terminal_state_hash(snap1) != compute_terminal_state_hash(snap2)
544+
545+
def test_different_hashes_for_different_session_ids(self) -> None:
546+
snap1 = {"session_id": "session-1", "command": "test"}
547+
snap2 = {"session_id": "session-2", "command": "test"}
548+
549+
assert compute_terminal_state_hash(snap1) != compute_terminal_state_hash(snap2)
550+
551+
def test_handles_timestamps_in_transcripts(self) -> None:
552+
snap1 = {"session_id": "s1", "transcript": "Build completed at 10:30:45"}
553+
snap2 = {"session_id": "s1", "transcript": "Build completed at 14:22:01"}
554+
555+
assert compute_terminal_state_hash(snap1) == compute_terminal_state_hash(snap2)

0 commit comments

Comments
 (0)