Skip to content

Commit 59165ec

Browse files
committed
fix: change default delimiter from comma to tab in AGONColumns format
1 parent 4f491a1 commit 59165ec

2 files changed

Lines changed: 190 additions & 67 deletions

File tree

src/agon/formats/columns.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
"""AGONColumns format codec.
1+
r"""AGONColumns format codec.
22
33
AGONColumns is a columnar encoding that transposes data to group by column (type)
44
instead of row. This provides better token efficiency for wide tables with
55
many columns of the same type.
66
77
Format structure:
88
@AGON columns
9-
@D=<delimiter> # optional, default: ,
9+
@D=<delimiter> # optional, default: \t
1010
<data>
1111
1212
Example:
@@ -27,7 +27,7 @@
2727
from agon.formats.base import AGONFormat
2828

2929
HEADER = "@AGON columns"
30-
DEFAULT_DELIMITER = ", "
30+
DEFAULT_DELIMITER = "\t"
3131
INDENT = " "
3232

3333
# Tree drawing characters

tests/test_columns.py

Lines changed: 187 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ def test_encode_decode_roundtrip_nested(self) -> None:
4343
decoded = AGONColumns.decode(encoded)
4444
assert decoded == data
4545

46+
def test_encode_falls_back_to_string_for_unknown_types(self) -> None:
47+
class Custom:
48+
def __str__(self) -> str: # pragma: no cover
49+
return "CUSTOM"
50+
51+
encoded = AGONColumns.encode({"x": Custom()})
52+
decoded = AGONColumns.decode(encoded)
53+
assert decoded == {"x": "CUSTOM"}
54+
4655

4756
class TestAGONColumnsColumnar:
4857
"""Tests for columnar array encoding (uniform objects)."""
@@ -54,15 +63,13 @@ def test_encode_columnar_array(self, simple_data: list[dict[str, Any]]) -> None:
5463
assert "└" in encoded or "`" in encoded
5564

5665
def test_decode_columnar_array(self) -> None:
57-
payload = textwrap.dedent(
58-
"""\
59-
@AGON columns
60-
61-
products[3]
62-
├ sku: A123, B456, C789
63-
├ name: Widget, Gadget, Gizmo
64-
└ price: 9.99, 19.99, 29.99
65-
"""
66+
payload = (
67+
"@AGON columns\n"
68+
"\n"
69+
"products[3]\n"
70+
"├ sku: A123\tB456\tC789\n"
71+
"├ name: Widget\tGadget\tGizmo\n"
72+
"└ price: 9.99\t19.99\t29.99\n"
6673
)
6774
decoded = AGONColumns.decode(payload)
6875
assert "products" in decoded
@@ -73,15 +80,13 @@ def test_decode_columnar_array(self) -> None:
7380
assert products[2] == {"sku": "C789", "name": "Gizmo", "price": 29.99}
7481

7582
def test_decode_columnar_array_unnamed(self) -> None:
76-
payload = textwrap.dedent(
77-
"""\
78-
@AGON columns
79-
80-
[3]
81-
├ sku: A123, B456, C789
82-
├ name: Widget, Gadget, Gizmo
83-
└ price: 9.99, 19.99, 29.99
84-
"""
83+
payload = (
84+
"@AGON columns\n"
85+
"\n"
86+
"[3]\n"
87+
"├ sku: A123\tB456\tC789\n"
88+
"├ name: Widget\tGadget\tGizmo\n"
89+
"└ price: 9.99\t19.99\t29.99\n"
8590
)
8691
decoded = AGONColumns.decode(payload)
8792
assert len(decoded) == 3
@@ -93,15 +98,13 @@ def test_roundtrip_columnar_array(self, simple_data: list[dict[str, Any]]) -> No
9398
assert decoded == simple_data
9499

95100
def test_columnar_with_missing_values(self) -> None:
96-
payload = textwrap.dedent(
97-
"""\
98-
@AGON columns
99-
100-
users[3]
101-
├ id: 1, 2, 3
102-
├ name: Alice, Bob, Carol
103-
└ email: alice@example.com, , carol@example.com
104-
"""
101+
payload = (
102+
"@AGON columns\n"
103+
"\n"
104+
"users[3]\n"
105+
"├ id: 1\t2\t3\n"
106+
"├ name: Alice\tBob\tCarol\n"
107+
"└ email: alice@example.com\t\tcarol@example.com\n"
105108
)
106109
decoded = AGONColumns.decode(payload)
107110
users = decoded["users"]
@@ -119,21 +122,73 @@ def test_ascii_tree_chars(self) -> None:
119122
assert "└" not in encoded
120123

121124
def test_decode_ascii_tree_chars(self) -> None:
122-
payload = textwrap.dedent(
123-
"""\
124-
@AGON columns
125-
126-
users[2]
127-
| id: 1, 2
128-
` name: Alice, Bob
129-
"""
130-
)
125+
payload = "@AGON columns\n\nusers[2]\n| id: 1\t2\n` name: Alice\tBob\n"
131126
decoded = AGONColumns.decode(payload)
132127
users = decoded["users"]
133128
assert len(users) == 2
134129
assert users[0] == {"id": 1, "name": "Alice"}
135130
assert users[1] == {"id": 2, "name": "Bob"}
136131

132+
def test_decode_columnar_array_field_shorter_than_count(self) -> None:
133+
payload = "@AGON columns\n\nusers[2]\n└ id: 1\n"
134+
decoded = AGONColumns.decode(payload)
135+
assert decoded == {"users": [{"id": 1}, {}]}
136+
137+
def test_decode_columnar_array_null_cell_means_present_none(self) -> None:
138+
payload = "@AGON columns\n\nusers[2]\n└ email: null\t\n"
139+
decoded = AGONColumns.decode(payload)
140+
assert decoded == {"users": [{"email": None}, {}]}
141+
142+
def test_decode_columnar_array_escaped_quote_inside_cell(self) -> None:
143+
payload = '@AGON columns\n\nitems[2]\n└ s: "a\\"b"\t"c"\n'
144+
decoded = AGONColumns.decode(payload)
145+
assert decoded == {"items": [{"s": 'a"b'}, {"s": "c"}]}
146+
147+
148+
class TestAGONColumnsQuotingRoundtrip:
149+
"""Roundtrip tests for quoting/unquoting strings in columns format."""
150+
151+
def test_roundtrip_strings_requiring_quotes(self) -> None:
152+
data = [
153+
{"s": "123"},
154+
{"s": "null"},
155+
{"s": "@hello"},
156+
{"s": " spaced"},
157+
{"s": "a\tb"},
158+
{"s": r"a\\b"},
159+
{"s": "a\nline"},
160+
{"s": 'quote: "x"'},
161+
]
162+
encoded = AGONColumns.encode(data)
163+
decoded = AGONColumns.decode(encoded)
164+
assert decoded == data
165+
166+
167+
class TestAGONColumnsDirectives:
168+
"""Tests for @D= delimiter directive parsing."""
169+
170+
def test_decode_custom_delimiter_declaration(self) -> None:
171+
payload = '@AGON columns\n@D=\\n\n\nitems[1]\n└ s: "123"\n'
172+
decoded = AGONColumns.decode(payload)
173+
assert decoded == {"items": [{"s": "123"}]}
174+
175+
def test_decode_tab_delimiter_declaration(self) -> None:
176+
payload = '@AGON columns\n@D=\\t\n\nitems[2]\n└ s: "a"\t"b"\n'
177+
decoded = AGONColumns.decode(payload)
178+
assert decoded == {"items": [{"s": "a"}, {"s": "b"}]}
179+
180+
def test_encode_emits_delimiter_declaration_for_non_default(self) -> None:
181+
data = [{"id": 1}, {"id": 2}]
182+
encoded = AGONColumns.encode(data, delimiter=",", use_ascii=True)
183+
assert "@D=," in encoded
184+
decoded = AGONColumns.decode(encoded)
185+
assert decoded == data
186+
187+
def test_decode_custom_comma_delimiter_splits_quoted_values(self) -> None:
188+
payload = '@AGON columns\n@D=,\n\nitems[2]\n└ s: "a,b","c"\n'
189+
decoded = AGONColumns.decode(payload)
190+
assert decoded == {"items": [{"s": "a,b"}, {"s": "c"}]}
191+
137192

138193
class TestAGONColumnsPrimitiveArrays:
139194
"""Tests for primitive array encoding."""
@@ -144,13 +199,7 @@ def test_encode_primitive_array(self) -> None:
144199
assert "[3]:" in encoded
145200

146201
def test_decode_primitive_array(self) -> None:
147-
payload = textwrap.dedent(
148-
"""\
149-
@AGON columns
150-
151-
tags[4]: admin, ops, dev, user
152-
"""
153-
)
202+
payload = "@AGON columns\n\ntags[4]: admin\tops\tdev\tuser\n"
154203
decoded = AGONColumns.decode(payload)
155204
assert decoded == {"tags": ["admin", "ops", "dev", "user"]}
156205

@@ -187,6 +236,41 @@ def test_decode_list_array_with_objects(self) -> None:
187236
assert records[0] == {"name": "Alice", "age": 30}
188237
assert records[1] == {"name": "Bob", "age": 25}
189238

239+
def test_decode_list_array_with_primitives(self) -> None:
240+
payload = textwrap.dedent(
241+
"""\
242+
@AGON columns
243+
244+
items[3]:
245+
- 1
246+
- null
247+
- \"x\"
248+
"""
249+
)
250+
decoded = AGONColumns.decode(payload)
251+
assert decoded == {"items": [1, None, "x"]}
252+
253+
def test_decode_list_array_skips_blank_and_comment_lines(self) -> None:
254+
payload = textwrap.dedent(
255+
"""\
256+
@AGON columns
257+
258+
items[2]:
259+
# comment line
260+
- 1
261+
262+
- 2
263+
"""
264+
)
265+
decoded = AGONColumns.decode(payload)
266+
assert decoded == {"items": [1, 2]}
267+
268+
def test_roundtrip_list_item_object_with_nested_object(self) -> None:
269+
data = {"items": [{"id": 1, "meta": {"tags": ["a", "b"], "flag": True}}]}
270+
encoded = AGONColumns.encode(data)
271+
decoded = AGONColumns.decode(encoded)
272+
assert decoded == data
273+
190274

191275
class TestAGONColumnsPrimitives:
192276
"""Tests for primitive value handling."""
@@ -234,9 +318,10 @@ class TestAGONColumnsQuoting:
234318
"""Tests for string quoting rules."""
235319

236320
def test_quote_string_with_delimiter(self) -> None:
237-
data = {"text": "hello, world"}
321+
# Tab is the delimiter, so strings containing tabs need quoting
322+
data = {"text": "hello\tworld"}
238323
encoded = AGONColumns.encode(data)
239-
assert '"hello, world"' in encoded
324+
assert '"hello\\tworld"' in encoded
240325

241326
def test_quote_string_with_leading_space(self) -> None:
242327
data = {"text": " leading space"}
@@ -259,26 +344,29 @@ def test_roundtrip_quoted_strings(self) -> None:
259344
decoded = AGONColumns.decode(encoded)
260345
assert decoded == data
261346

347+
def test_decode_quoted_string_with_unknown_escape(self) -> None:
348+
payload = '@AGON columns\n\nv: "a\\q"\n'
349+
decoded = AGONColumns.decode(payload)
350+
assert decoded == {"v": "aq"}
351+
352+
def test_unquote_string_is_noop_for_unquoted_input(self) -> None:
353+
from agon.formats.columns import _unquote_string
354+
355+
assert _unquote_string("abc") == "abc"
356+
262357

263358
class TestAGONColumnsDelimiters:
264359
"""Tests for custom delimiters."""
265360

266-
def test_encode_with_tab_delimiter(self) -> None:
361+
def test_encode_with_comma_delimiter(self) -> None:
362+
# Tab is now the default, so test with comma to verify @D= is emitted
267363
data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
268-
encoded = AGONColumns.encode(data, delimiter="\t")
269-
assert "@D=\\t" in encoded
364+
encoded = AGONColumns.encode(data, delimiter=",")
365+
assert "@D=," in encoded
270366

271367
def test_decode_with_tab_delimiter(self) -> None:
272-
payload = textwrap.dedent(
273-
"""\
274-
@AGON columns
275-
@D=\\t
276-
277-
users[2]
278-
├ id: 1\t2
279-
└ name: Alice\tBob
280-
"""
281-
)
368+
# Tab is now the default, so no @D= needed
369+
payload = "@AGON columns\n\nusers[2]\n├ id: 1\t2\n└ name: Alice\tBob\n"
282370
decoded = AGONColumns.decode(payload)
283371
users = decoded["users"]
284372
assert len(users) == 2
@@ -309,8 +397,8 @@ def test_array_inside_object(self, nested_data: list[dict[str, Any]]) -> None:
309397
assert decoded == nested_data
310398

311399

312-
class TestAGONColumnsEdgeCases:
313-
"""Edge case tests."""
400+
class TestAGONColumnsEmptyAndStrings:
401+
"""Tests for empty values and string handling."""
314402

315403
def test_empty_array(self) -> None:
316404
data = {"items": []}
@@ -354,6 +442,30 @@ def test_wide_table(self) -> None:
354442
assert decoded == data
355443

356444

445+
class TestAGONColumnsArrays:
446+
"""Tests for array variants beyond pure columnar tables."""
447+
448+
def test_decode_primitive_array_empty_values(self) -> None:
449+
payload = "@AGON columns\n\nnums[0]: \n"
450+
decoded = AGONColumns.decode(payload)
451+
assert decoded == {"nums": []}
452+
453+
def test_decode_list_array_item_with_nested_primitive_array(self) -> None:
454+
payload = "@AGON columns\n\nitems[1]:\n - id: 1\n tags[2]: a\tb\n"
455+
decoded = AGONColumns.decode(payload)
456+
assert decoded == {"items": [{"id": 1, "tags": ["a", "b"]}]}
457+
458+
def test_decode_list_array_item_object_with_nested_object_value(self) -> None:
459+
payload = "@AGON columns\n\nitems[1]:\n - meta:\n a: 1\n"
460+
decoded = AGONColumns.decode(payload)
461+
assert decoded == {"items": [{"meta": {"a": 1}}]}
462+
463+
def test_decode_list_array_item_object_missing_nested_value_becomes_empty_object(self) -> None:
464+
payload = "@AGON columns\n\nitems[1]:\n - meta:\n"
465+
decoded = AGONColumns.decode(payload)
466+
assert decoded == {"items": [{"meta": {}}]}
467+
468+
357469
class TestAGONColumnsIntegration:
358470
"""Integration tests with AGON core."""
359471

@@ -389,6 +501,16 @@ def test_empty_payload(self) -> None:
389501
with pytest.raises(AGONColumnsError, match="Empty payload"):
390502
AGONColumns.decode("")
391503

504+
def test_cannot_parse_line_raises(self) -> None:
505+
payload = "@AGON columns\n\n???\n"
506+
with pytest.raises(AGONColumnsError, match=r"Cannot parse line"):
507+
AGONColumns.decode(payload)
508+
509+
def test_array_header_without_tree_lines_raises(self) -> None:
510+
payload = "@AGON columns\n\n[2]\nnot-a-tree\n"
511+
with pytest.raises(AGONColumnsError, match=r"Cannot parse line"):
512+
AGONColumns.decode(payload)
513+
392514

393515
class TestAGONColumnsHint:
394516
"""Test hint method."""
@@ -410,8 +532,8 @@ def test_repeated_values_in_column(self) -> None:
410532
{"status": "active", "type": "admin"},
411533
]
412534
encoded = AGONColumns.encode(data)
413-
# Values should be grouped by column
414-
assert "status: active, active, active" in encoded
535+
# Values should be grouped by column (tab-separated)
536+
assert "status: active\tactive\tactive" in encoded
415537
decoded = AGONColumns.decode(encoded)
416538
assert decoded == data
417539

@@ -423,7 +545,8 @@ def test_numeric_sequences(self) -> None:
423545
{"price": 29.99, "qty": 30},
424546
]
425547
encoded = AGONColumns.encode(data)
426-
assert "price: 9.99, 19.99, 29.99" in encoded
427-
assert "qty: 10, 20, 30" in encoded
548+
# Values should be tab-separated
549+
assert "price: 9.99\t19.99\t29.99" in encoded
550+
assert "qty: 10\t20\t30" in encoded
428551
decoded = AGONColumns.decode(encoded)
429552
assert decoded == data

0 commit comments

Comments
 (0)