Skip to content

Commit d11bf63

Browse files
committed
feat: hides headings by default on encoding
1 parent bdbda41 commit d11bf63

11 files changed

Lines changed: 251 additions & 114 deletions

File tree

src/agon/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
A self-describing, token-efficient data interchange format optimized for LLMs.
44
"""
55

6-
from agon.core import AGON, EncodingResult, Format
6+
from agon.core import AGON, AGONEncoding, Format
77
from agon.errors import (
88
AGONColumnsError,
99
AGONError,
@@ -14,10 +14,10 @@
1414
__all__ = [
1515
"AGON",
1616
"AGONColumnsError",
17+
"AGONEncoding",
1718
"AGONError",
1819
"AGONStructError",
1920
"AGONTextError",
20-
"EncodingResult",
2121
"Format",
2222
]
2323
__version__ = "0.1.0"

src/agon/core.py

Lines changed: 110 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from __future__ import annotations
1414

1515
from dataclasses import dataclass
16-
from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast
16+
from typing import TYPE_CHECKING, Any, ClassVar, Literal, cast, overload
1717

1818
if TYPE_CHECKING:
1919
from collections.abc import Callable # pragma: no cover
@@ -25,14 +25,44 @@
2525
from agon.formats import AGONColumns, AGONFormat, AGONStruct, AGONText
2626

2727
Format = Literal["auto", "json", "text", "columns", "struct"]
28+
ConcreteFormat = Literal["json", "text", "columns", "struct"]
2829

2930

3031
@dataclass(frozen=True)
31-
class EncodingResult:
32-
"""Result of AGON encoding with format metadata."""
32+
class AGONEncoding:
33+
r"""Result of AGON encoding with format metadata.
34+
35+
Use directly in LLM prompts - str() returns the encoded text.
36+
37+
Example:
38+
>>> result = AGON.encode(data)
39+
>>> prompt = f"Analyze this data:\\n{result}" # uses __str__
40+
>>> len(result) # character count
41+
>>> AGON.decode(response, format=result.format)
42+
"""
3343

3444
format: Format
3545
text: str
46+
header: str = ""
47+
48+
def __str__(self) -> str:
49+
"""Return encoded text for use in prompts."""
50+
return self.text
51+
52+
def __len__(self) -> int:
53+
"""Return character count of encoded text."""
54+
return len(self.text)
55+
56+
def __repr__(self) -> str:
57+
"""Return debug representation."""
58+
preview = self.text[:50] + "..." if len(self.text) > 50 else self.text
59+
return f"AGONEncoding(format={self.format!r}, len={len(self.text)}, text={preview!r})"
60+
61+
def with_header(self) -> str:
62+
"""Return encoded text with header prepended (for auto-detect decoding)."""
63+
if not self.header:
64+
return self.text
65+
return f"{self.header}\n\n{self.text}"
3666

3767

3868
class AGON:
@@ -54,12 +84,20 @@ class AGON:
5484
- Self-describing: no training or config required.
5585
"""
5686

57-
# Format registries
58-
_encoders: ClassVar[dict[str, Callable[[Any], str]]] = {
87+
# Format headers (for decoding)
88+
_headers: ClassVar[dict[ConcreteFormat, str]] = {
89+
"json": "",
90+
"text": "@AGON text",
91+
"columns": "@AGON columns",
92+
"struct": "@AGON struct",
93+
}
94+
95+
# Format registries (encode without headers - headers added separately)
96+
_encoders: ClassVar[dict[ConcreteFormat, Callable[[Any], str]]] = {
5997
"json": lambda data: orjson.dumps(data).decode(),
60-
"text": AGONText.encode,
61-
"columns": AGONColumns.encode,
62-
"struct": AGONStruct.encode,
98+
"text": lambda data: AGONText.encode(data, include_header=False),
99+
"columns": lambda data: AGONColumns.encode(data, include_header=False),
100+
"struct": lambda data: AGONStruct.encode(data, include_header=False),
63101
}
64102

65103
_decoders: ClassVar[dict[str, Callable[[str], Any]]] = {
@@ -70,17 +108,17 @@ class AGON:
70108

71109
@staticmethod
72110
def encode(
73-
data: Any,
111+
data: object,
74112
*,
75113
format: Format = "auto",
76114
force: bool = False,
77115
min_savings: float = 0.10,
78116
encoding: str = DEFAULT_ENCODING,
79-
) -> str:
117+
) -> AGONEncoding:
80118
"""Encode data to the most token-efficient AGON format.
81119
82120
Args:
83-
data: Data to encode. Any JSON-serializable value.
121+
data: Data to encode. Must be JSON-serializable.
84122
format: Format to use:
85123
- "auto": Select best format based on token count (default)
86124
- "json": Raw JSON
@@ -92,58 +130,30 @@ def encode(
92130
encoding: Tiktoken encoding for token counting (default: o200k_base).
93131
94132
Returns:
95-
Encoded string in the selected format.
133+
EncodingResult containing:
134+
- format: The format used
135+
- text: Encoded data (send this to LLMs)
136+
- header: Format header (for decoding with auto-detect)
96137
97138
Example:
98139
>>> data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
99-
>>> AGON.encode(data, format="text")
100-
"""
101-
# Direct format dispatch
102-
if encoder := AGON._encoders.get(format):
103-
return encoder(data)
104-
105-
# format == "auto": select best
106-
candidates = [
107-
(fmt, encoder(data))
108-
for fmt, encoder in AGON._encoders.items()
109-
if force is False or fmt != "json"
110-
]
111-
112-
# Select smallest token count
113-
token_counts = [count_tokens(text, encoding=encoding) for _, text in candidates]
114-
best_idx = min(range(len(candidates)), key=lambda i: token_counts[i])
115-
best_format, best_text = candidates[best_idx]
116-
117-
# Apply min_savings threshold
118-
if not force and best_format != "json":
119-
json_idx = next(i for i, (fmt, _) in enumerate(candidates) if fmt == "json")
120-
json_tokens = token_counts[json_idx]
121-
savings = 1.0 - (token_counts[best_idx] / max(1, json_tokens))
122-
if savings < min_savings:
123-
return candidates[json_idx][1]
124-
125-
return best_text
126-
127-
@staticmethod
128-
def encode_with_format(
129-
data: Any,
130-
*,
131-
format: Format = "auto",
132-
force: bool = False,
133-
min_savings: float = 0.10,
134-
encoding: str = DEFAULT_ENCODING,
135-
) -> EncodingResult:
136-
"""Encode data and return result with format metadata.
137-
138-
Same as encode() but returns an EncodingResult with format info.
140+
>>> result = AGON.encode(data)
141+
>>> response = send_to_llm(f"Analyze: {result}") # uses __str__
142+
>>> AGON.decode(response, result) # decode using same format
139143
"""
140144
# Direct format dispatch
141-
if encoder := AGON._encoders.get(format):
142-
return EncodingResult(format, encoder(data))
145+
if format != "auto":
146+
text = AGON._encoders[format](data)
147+
header = AGON._headers[format]
148+
return AGONEncoding(format, text, header)
143149

144150
# format == "auto"
145151
candidates = [
146-
EncodingResult(cast("Format", fmt), encoder(data))
152+
AGONEncoding(
153+
cast("Format", fmt),
154+
encoder(data),
155+
AGON._headers.get(fmt, ""),
156+
)
147157
for fmt, encoder in AGON._encoders.items()
148158
if force is False or fmt != "json"
149159
]
@@ -162,31 +172,64 @@ def encode_with_format(
162172

163173
return best
164174

175+
@overload
165176
@staticmethod
166-
def decode(payload: str) -> Any:
167-
"""Decode an AGON-encoded payload.
177+
def decode(payload: AGONEncoding) -> Any: ...
168178

169-
Automatically detects the format by prefix matching.
179+
@overload
180+
@staticmethod
181+
def decode(payload: str, format: Format | None = None) -> Any: ...
182+
183+
@staticmethod
184+
def decode(
185+
payload: str | AGONEncoding,
186+
format: Format | None = None,
187+
) -> Any:
188+
"""Decode an AGON-encoded payload.
170189
171190
Args:
172-
payload: Encoded string in any AGON format.
191+
payload: What to decode. Can be:
192+
- AGONEncoding: Decode using its text and format
193+
- str: Encoded string (use format param or auto-detect)
194+
format: Format to use (only for str payload). If None, auto-detects.
173195
174196
Returns:
175197
Decoded Python value.
176198
177199
Raises:
178200
AGONError: If the payload is invalid.
179-
"""
180-
payload = payload.strip()
181201
182-
# Prefix-based decoder dispatch
183-
for prefix, decoder in AGON._decoders.items():
184-
if payload.startswith(prefix):
185-
return decoder(payload)
202+
Example:
203+
>>> result = AGON.encode(data)
204+
>>> AGON.decode(result) # decode AGONEncoding directly
205+
"""
206+
if isinstance(payload, AGONEncoding):
207+
format, payload = payload.format, payload.text
208+
209+
text = payload.strip()
210+
211+
# Auto-detect from header prefix
212+
if format is None or format == "auto":
213+
for prefix, decoder in AGON._decoders.items():
214+
if text.startswith(prefix):
215+
return decoder(text)
216+
return AGON._decode_json(text)
217+
218+
# Dispatch by format
219+
match format:
220+
case "json":
221+
return AGON._decode_json(text)
222+
case "text" | "columns" | "struct":
223+
header = AGON._headers[cast("ConcreteFormat", format)]
224+
if not text.startswith(header):
225+
text = f"{header}\n\n{text}"
226+
return AGON._decoders[header](text)
186227

187-
# Fallback: raw JSON
228+
@staticmethod
229+
def _decode_json(text: str) -> object:
230+
"""Decode JSON text."""
188231
try:
189-
return orjson.loads(payload)
232+
return orjson.loads(text)
190233
except orjson.JSONDecodeError as e:
191234
raise AGONError(f"Invalid JSON: {e}") from e
192235

src/agon/formats/base.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,18 @@ class AGONFormat(ABC):
1515
- hint() -> str
1616
"""
1717

18+
@staticmethod
19+
@abstractmethod
20+
def encode(data: object, *, include_header: bool = False) -> str:
21+
"""Encode data to this format."""
22+
...
23+
24+
@staticmethod
25+
@abstractmethod
26+
def decode(payload: str) -> object:
27+
"""Decode a payload in this format."""
28+
...
29+
1830
@staticmethod
1931
@abstractmethod
2032
def hint() -> str:

src/agon/formats/columns.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def hint() -> str:
5858

5959
@staticmethod
6060
def encode(
61-
data: Any,
61+
data: object,
6262
*,
6363
delimiter: str = DEFAULT_DELIMITER,
6464
include_header: bool = True,

src/agon/formats/struct.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def hint() -> str:
6969

7070
@staticmethod
7171
def encode(
72-
data: Any,
72+
data: object,
7373
*,
7474
include_header: bool = True,
7575
min_occurrences: int = 3,
@@ -101,7 +101,10 @@ def encode(
101101
lines.append(HEADER)
102102
lines.append("")
103103

104-
# Emit struct definitions
104+
# Emit struct definitions even when headers are disabled.
105+
# The header is used for auto-detect decoding, but LLM prompts need
106+
# the struct templates to interpret instances like FR(v1, v2).
107+
if struct_defs:
105108
for name, fields, optional, parents in struct_defs:
106109
fields_str = ", ".join(f + "?" if f in optional else f for f in fields)
107110
if parents:
@@ -110,8 +113,7 @@ def encode(
110113
else:
111114
lines.append(f"@{name}: {fields_str}")
112115

113-
if struct_defs:
114-
lines.append("")
116+
lines.append("")
115117

116118
_encode_value(data, lines, depth=0, registry=registry)
117119

@@ -200,7 +202,7 @@ def _register_struct(
200202

201203

202204
def _detect_shapes(
203-
data: Any,
205+
data: object,
204206
shapes: Counter[tuple[str, ...]] | None = None,
205207
) -> Counter[tuple[str, ...]]:
206208
"""Detect repeated object shapes in data."""

src/agon/formats/text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def hint() -> str:
5151

5252
@staticmethod
5353
def encode(
54-
data: Any,
54+
data: object,
5555
*,
5656
delimiter: str = DEFAULT_DELIMITER,
5757
include_header: bool = True,

tests/test_benchmarks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,12 @@ def test_fixture_benchmark(fixture_path: Path) -> None:
117117
format_results[fmt] = (tokens, savings, encode_ms, decode_ms)
118118

119119
# Test auto selection
120-
result = AGON.encode_with_format(records, format="auto")
120+
result = AGON.encode(records, format="auto")
121121
auto_tokens = count_tokens(result.text)
122122
auto_savings = (1 - auto_tokens / max(1, raw_tokens)) * 100
123123

124-
# Verify auto decode
125-
decoded = AGON.decode(result.text)
124+
# Verify auto decode (decode AGONEncoding directly)
125+
decoded = AGON.decode(result)
126126
assert normalize_floats(decoded) == normalize_floats(records), "auto roundtrip failed"
127127

128128
# Print results

0 commit comments

Comments
 (0)