Skip to content

Commit 40e911f

Browse files
committed
fix: handle ':' in struct encode and add corresponding tests
1 parent d11bf63 commit 40e911f

3 files changed

Lines changed: 215 additions & 1 deletion

File tree

src/agon/formats/struct.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,8 @@ def _needs_quoting(s: str) -> bool:
362362
if s != s.strip():
363363
return True
364364
# Quote if contains special chars
365-
return "," in s or "(" in s or ")" in s or "\\" in s or "\n" in s or '"' in s
365+
# ':' is included to avoid ambiguity with inline key-value parsing in lists.
366+
return "," in s or ":" in s or "(" in s or ")" in s or "\\" in s or "\n" in s or '"' in s
366367

367368

368369
def _quote_string(s: str) -> str:
@@ -857,6 +858,14 @@ def _decode_array(
857858
idx += 1
858859
continue
859860

861+
# If this is a quoted string list item, treat it as a primitive.
862+
# This avoids ambiguity with inline object syntax when the string
863+
# contains ':' (e.g. "keyword match: foo").
864+
if content.startswith('"') and content.endswith('"'):
865+
result.append(_parse_primitive(content))
866+
idx += 1
867+
continue
868+
860869
kv = KEY_VALUE_RE.match(content)
861870
if kv:
862871
obj, idx = _decode_list_item_object(lines, idx, base_depth, registry, lenient)

tests/data/scars.json

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
{
2+
"success": true,
3+
"count": 5,
4+
"scars": [
5+
{
6+
"id": "83db87c7-8a40-493c-aee7-b06a5626a710",
7+
"title": "Use distinct representations for None vs empty string vs empty object",
8+
"lesson": "In text formats, `key: ` (empty after colon) is ambiguous - could mean None, empty string, or start of nested object. Use explicit `null` for None, `\"\"` for empty string, and only use empty-after-colon for nested structures. Otherwise decoder can't distinguish and may return wrong type.",
9+
"severity": "MAJOR",
10+
"tier": "fresh",
11+
"memory_type": "episodic",
12+
"confidence": "50%",
13+
"confidence_source": "solution_verified",
14+
"match_score": 1.411,
15+
"match_reasons": [
16+
"moderate similarity (63%)",
17+
"keyword match: for, object, return",
18+
"language match",
19+
"recent/fresh memory"
20+
],
21+
"match_details": {
22+
"similarity": 0.632,
23+
"trigger_boost": 1.3,
24+
"context_boost": 1.15,
25+
"keyword_matches": [
26+
"for",
27+
"object",
28+
"return",
29+
"Encode"
30+
],
31+
"error_code_matches": [],
32+
"project_match": false,
33+
"language_match": true,
34+
"tag_matches": []
35+
}
36+
},
37+
{
38+
"id": "72340b0a-9e72-4486-9361-a0eebdd627f3",
39+
"title": "Quote strings with leading/trailing whitespace for roundtrip",
40+
"lesson": "Strings with leading or trailing whitespace must be quoted during encoding because decoders typically call `.strip()` on values. Without quoting, `\"Name \"` becomes `\"Name\"` after roundtrip. Add check: `if s != s.strip(): return quote(s)`",
41+
"severity": "MODERATE",
42+
"tier": "fresh",
43+
"memory_type": "episodic",
44+
"confidence": "50%",
45+
"confidence_source": "solution_verified",
46+
"match_score": 1.381,
47+
"match_reasons": [
48+
"moderate similarity (62%)",
49+
"keyword match: for, return, python",
50+
"language match",
51+
"recent/fresh memory"
52+
],
53+
"match_details": {
54+
"similarity": 0.616,
55+
"trigger_boost": 1.3,
56+
"context_boost": 1.15,
57+
"keyword_matches": [
58+
"for",
59+
"return",
60+
"python"
61+
],
62+
"error_code_matches": [],
63+
"project_match": false,
64+
"language_match": true,
65+
"tag_matches": []
66+
}
67+
},
68+
{
69+
"id": "d09e1c17-86aa-47e2-98f2-58919e97d4e4",
70+
"title": "Prefer clean, concise Python over verbose implementations",
71+
"lesson": "Write clean, elegant Python code. Use Pydantic's native iteration (`for field, value in model`), walrus operators, and built-in features. Avoid verbose patterns like explicit field listing when iteration works. Trust Python's capabilities.",
72+
"severity": "MODERATE",
73+
"tier": "consolidated",
74+
"memory_type": "semantic",
75+
"confidence": "100%",
76+
"confidence_source": "user_confirmed",
77+
"match_score": 1.088,
78+
"match_reasons": [
79+
"moderate similarity (64%)",
80+
"keyword match: py, Python, for",
81+
"language match",
82+
"recent/fresh memory",
83+
"high confidence"
84+
],
85+
"match_details": {
86+
"similarity": 0.639,
87+
"trigger_boost": 1.3,
88+
"context_boost": 1.15,
89+
"keyword_matches": [
90+
"py",
91+
"Python",
92+
"for"
93+
],
94+
"error_code_matches": [],
95+
"project_match": false,
96+
"language_match": true,
97+
"tag_matches": []
98+
}
99+
},
100+
{
101+
"id": "f8d66345-4af2-49f1-90dc-1a99ece0b073",
102+
"title": "Antipattern: Verbose if-checks for optional field appending",
103+
"lesson": "When building text from optional Pydantic/dataclass fields, iterate over fields instead of manual if-checks. Use getattr() or model iteration to avoid repetitive conditionals.",
104+
"severity": "MODERATE",
105+
"tier": "consolidated",
106+
"memory_type": "semantic",
107+
"confidence": "100%",
108+
"confidence_source": "user_confirmed",
109+
"match_score": 1.07,
110+
"match_reasons": [
111+
"moderate similarity (61%)",
112+
"keyword match: g, for, optional",
113+
"language match",
114+
"tag match: dataclass",
115+
"recent/fresh memory",
116+
"high confidence"
117+
],
118+
"match_details": {
119+
"similarity": 0.607,
120+
"trigger_boost": 1.3,
121+
"context_boost": 1.26,
122+
"keyword_matches": [
123+
"g",
124+
"for",
125+
"optional"
126+
],
127+
"error_code_matches": [],
128+
"project_match": false,
129+
"language_match": true,
130+
"tag_matches": [
131+
"dataclass"
132+
]
133+
}
134+
},
135+
{
136+
"id": "1b8e4413-70ba-4194-b15e-5b4f021430ae",
137+
"title": "Use __repr__ and __str__ for model self-formatting (clean architecture)",
138+
"lesson": "Models should know how to format themselves. Implement `__repr__` and `__str__` methods on Pydantic models, then just call `repr(model)` or `str(model)` where needed. This is clean, follows single responsibility, and is more maintainable than external formatting logic.",
139+
"severity": "MODERATE",
140+
"tier": "consolidated",
141+
"memory_type": "semantic",
142+
"confidence": "100%",
143+
"confidence_source": "user_confirmed",
144+
"match_score": 0.96,
145+
"match_reasons": [
146+
"moderate similarity (62%)",
147+
"keyword match: g, format, function",
148+
"language match",
149+
"recent/fresh memory",
150+
"high confidence"
151+
],
152+
"match_details": {
153+
"similarity": 0.624,
154+
"trigger_boost": 1.3,
155+
"context_boost": 1.15,
156+
"keyword_matches": [
157+
"g",
158+
"format",
159+
"function"
160+
],
161+
"error_code_matches": [],
162+
"project_match": false,
163+
"language_match": true,
164+
"tag_matches": []
165+
}
166+
}
167+
],
168+
"formatted_warning": "## PAST MISTAKES - REVIEW BEFORE PROCEEDING\n\n### [MAJOR] Use distinct representations for None vs empty string vs empty object\n**Lesson**: In text formats, `key: ` (empty after colon) is ambiguous - could mean None, empty string, or start of nested object. Use explicit `null` for None, `\"\"` for empty string, and only use empty-after-colon for nested structures. Otherwise decoder can't distinguish and may return wrong type.\n**Solution**: Encode None as \"null\" in key-value contexts, empty string as '\"\"' (quoted empty). Only use blank after colon when followed by indented nested content.\n*Confidence: 50% (verified) | Context: python, agon*\n\n### [MODERATE] Quote strings with leading/trailing whitespace for roundtrip\n**Lesson**: Strings with leading or trailing whitespace must be quoted during encoding because decoders typically call `.strip()` on values. Without quoting, `\"Name \"` becomes `\"Name\"` after roundtrip. Add check: `if s != s.strip(): return quote(s)`\n**Solution**: In _needs_quoting(), add: `if s != s.strip(): return True` to force quoting strings with leading/trailing whitespace.\n*Confidence: 50% (verified) | Context: python, agon*\n\n### [MODERATE] Prefer clean, concise Python over verbose implementations\n**Lesson**: Write clean, elegant Python code. Use Pydantic's native iteration (`for field, value in model`), walrus operators, and built-in features. Avoid verbose patterns like explicit field listing when iteration works. Trust Python's capabilities.\n**Solution**: Recognized that Pydantic BaseModel IS iterable. Kept the clean implementation using `for _, value in self:` pattern. Used `__repr__` and `__str__` methods for self-formatting. Single responsibility principle - models format themselves.\n*Confidence: 100% (verified) | Prevented: 4x | Context: python, scars*\n\n### [MODERATE] Antipattern: Verbose if-checks for optional field appending\n**Lesson**: When building text from optional Pydantic/dataclass fields, iterate over fields instead of manual if-checks. Use getattr() or model iteration to avoid repetitive conditionals.\n**Solution**: Use field iteration or dictionary comprehension to filter non-None/non-empty values, then format them programmatically\n*Confidence: 100% (verified) | Prevented: 2x | Context: python, scars*\n\n### [MODERATE] Use __repr__ and __str__ for model self-formatting (clean architecture)\n**Lesson**: Models should know how to format themselves. Implement `__repr__` and `__str__` methods on Pydantic models, then just call `repr(model)` or `str(model)` where needed. This is clean, follows single responsibility, and is more maintainable than external formatting logic.\n**Solution**: Keep formatting in the model via `__repr__` and `__str__`. Example: Scar uses `__repr__` for embedding representation, RetrievalQuery uses `__str__` for query formatting. Embedding generator just calls these methods. Clean separation of concerns.\n*Confidence: 100% (verified) | Prevented: 1x | Context: python, scars*\n\n---\n",
169+
"feedback_reminder": "After using these scars, please provide feedback using reinforce_scar: feedback_type='helpful' if the advice prevented an error, 'irrelevant' if it didn't apply, or 'incorrect' if it was wrong.",
170+
"pending_feedback": [
171+
{
172+
"scar_id": "83db87c7-8a40-493c-aee7-b06a5626a710",
173+
"title": "Use distinct representations for None vs empty string vs empty object",
174+
"is_speculative": false
175+
},
176+
{
177+
"scar_id": "72340b0a-9e72-4486-9361-a0eebdd627f3",
178+
"title": "Quote strings with leading/trailing whitespace for roundtrip",
179+
"is_speculative": false
180+
},
181+
{
182+
"scar_id": "d09e1c17-86aa-47e2-98f2-58919e97d4e4",
183+
"title": "Prefer clean, concise Python over verbose implementations",
184+
"is_speculative": false
185+
},
186+
{
187+
"scar_id": "f8d66345-4af2-49f1-90dc-1a99ece0b073",
188+
"title": "Antipattern: Verbose if-checks for optional field appending",
189+
"is_speculative": false
190+
},
191+
{
192+
"scar_id": "1b8e4413-70ba-4194-b15e-5b4f021430ae",
193+
"title": "Use __repr__ and __str__ for model self-formatting (clean architecture)",
194+
"is_speculative": false
195+
}
196+
]
197+
}

tests/test_struct.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,14 @@ def test_roundtrip_array_of_structs(self) -> None:
246246
decoded = AGONStruct.decode(encoded)
247247
assert decoded == data
248248

249+
def test_roundtrip_array_of_strings_with_colon(self) -> None:
250+
# quoted strings containing ':' must not be parsed as
251+
# inline key-value objects when they appear as list items.
252+
data = ["keyword match: for, object, return", "language match"]
253+
encoded = AGONStruct.encode(data)
254+
decoded = AGONStruct.decode(encoded)
255+
assert decoded == data
256+
249257

250258
class TestAGONStructEscaping:
251259
"""Tests for value escaping."""

0 commit comments

Comments
 (0)