1313from __future__ import annotations
1414
1515from dataclasses import dataclass
16- from typing import TYPE_CHECKING , Any , ClassVar , Literal , cast
16+ from typing import TYPE_CHECKING , Any , ClassVar , Literal , cast , overload
1717
1818if TYPE_CHECKING :
1919 from collections .abc import Callable # pragma: no cover
2525from agon .formats import AGONColumns , AGONFormat , AGONStruct , AGONText
2626
2727Format = Literal ["auto" , "json" , "text" , "columns" , "struct" ]
28+ ConcreteFormat = Literal ["json" , "text" , "columns" , "struct" ]
2829
2930
3031@dataclass (frozen = True )
31- class EncodingResult :
32- """Result of AGON encoding with format metadata."""
32+ class AGONEncoding :
33+ r"""Result of AGON encoding with format metadata.
34+
35+ Use directly in LLM prompts - str() returns the encoded text.
36+
37+ Example:
38+ >>> result = AGON.encode(data)
39+ >>> prompt = f"Analyze this data:\\n{result}" # uses __str__
40+ >>> len(result) # character count
41+ >>> AGON.decode(response, format=result.format)
42+ """
3343
3444 format : Format
3545 text : str
46+ header : str = ""
47+
48+ def __str__ (self ) -> str :
49+ """Return encoded text for use in prompts."""
50+ return self .text
51+
52+ def __len__ (self ) -> int :
53+ """Return character count of encoded text."""
54+ return len (self .text )
55+
56+ def __repr__ (self ) -> str :
57+ """Return debug representation."""
58+ preview = self .text [:50 ] + "..." if len (self .text ) > 50 else self .text
59+ return f"AGONEncoding(format={ self .format !r} , len={ len (self .text )} , text={ preview !r} )"
60+
61+ def with_header (self ) -> str :
62+ """Return encoded text with header prepended (for auto-detect decoding)."""
63+ if not self .header :
64+ return self .text
65+ return f"{ self .header } \n \n { self .text } "
3666
3767
3868class AGON :
@@ -54,12 +84,20 @@ class AGON:
5484 - Self-describing: no training or config required.
5585 """
5686
57- # Format registries
58- _encoders : ClassVar [dict [str , Callable [[Any ], str ]]] = {
87+ # Format headers (for decoding)
88+ _headers : ClassVar [dict [ConcreteFormat , str ]] = {
89+ "json" : "" ,
90+ "text" : "@AGON text" ,
91+ "columns" : "@AGON columns" ,
92+ "struct" : "@AGON struct" ,
93+ }
94+
95+ # Format registries (encode without headers - headers added separately)
96+ _encoders : ClassVar [dict [ConcreteFormat , Callable [[Any ], str ]]] = {
5997 "json" : lambda data : orjson .dumps (data ).decode (),
60- "text" : AGONText .encode ,
61- "columns" : AGONColumns .encode ,
62- "struct" : AGONStruct .encode ,
98+ "text" : lambda data : AGONText .encode ( data , include_header = False ) ,
99+ "columns" : lambda data : AGONColumns .encode ( data , include_header = False ) ,
100+ "struct" : lambda data : AGONStruct .encode ( data , include_header = False ) ,
63101 }
64102
65103 _decoders : ClassVar [dict [str , Callable [[str ], Any ]]] = {
@@ -70,17 +108,17 @@ class AGON:
70108
71109 @staticmethod
72110 def encode (
73- data : Any ,
111+ data : object ,
74112 * ,
75113 format : Format = "auto" ,
76114 force : bool = False ,
77115 min_savings : float = 0.10 ,
78116 encoding : str = DEFAULT_ENCODING ,
79- ) -> str :
117+ ) -> AGONEncoding :
80118 """Encode data to the most token-efficient AGON format.
81119
82120 Args:
83- data: Data to encode. Any JSON-serializable value .
121+ data: Data to encode. Must be JSON-serializable.
84122 format: Format to use:
85123 - "auto": Select best format based on token count (default)
86124 - "json": Raw JSON
@@ -92,58 +130,30 @@ def encode(
92130 encoding: Tiktoken encoding for token counting (default: o200k_base).
93131
94132 Returns:
95- Encoded string in the selected format.
133+ EncodingResult containing:
134+ - format: The format used
135+ - text: Encoded data (send this to LLMs)
136+ - header: Format header (for decoding with auto-detect)
96137
97138 Example:
98139 >>> data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
99- >>> AGON.encode(data, format="text")
100- """
101- # Direct format dispatch
102- if encoder := AGON ._encoders .get (format ):
103- return encoder (data )
104-
105- # format == "auto": select best
106- candidates = [
107- (fmt , encoder (data ))
108- for fmt , encoder in AGON ._encoders .items ()
109- if force is False or fmt != "json"
110- ]
111-
112- # Select smallest token count
113- token_counts = [count_tokens (text , encoding = encoding ) for _ , text in candidates ]
114- best_idx = min (range (len (candidates )), key = lambda i : token_counts [i ])
115- best_format , best_text = candidates [best_idx ]
116-
117- # Apply min_savings threshold
118- if not force and best_format != "json" :
119- json_idx = next (i for i , (fmt , _ ) in enumerate (candidates ) if fmt == "json" )
120- json_tokens = token_counts [json_idx ]
121- savings = 1.0 - (token_counts [best_idx ] / max (1 , json_tokens ))
122- if savings < min_savings :
123- return candidates [json_idx ][1 ]
124-
125- return best_text
126-
127- @staticmethod
128- def encode_with_format (
129- data : Any ,
130- * ,
131- format : Format = "auto" ,
132- force : bool = False ,
133- min_savings : float = 0.10 ,
134- encoding : str = DEFAULT_ENCODING ,
135- ) -> EncodingResult :
136- """Encode data and return result with format metadata.
137-
138- Same as encode() but returns an EncodingResult with format info.
140+ >>> result = AGON.encode(data)
141+ >>> response = send_to_llm(f"Analyze: {result}") # uses __str__
142+ >>> AGON.decode(response, result) # decode using same format
139143 """
140144 # Direct format dispatch
141- if encoder := AGON ._encoders .get (format ):
142- return EncodingResult (format , encoder (data ))
145+ if format != "auto" :
146+ text = AGON ._encoders [format ](data )
147+ header = AGON ._headers [format ]
148+ return AGONEncoding (format , text , header )
143149
144150 # format == "auto"
145151 candidates = [
146- EncodingResult (cast ("Format" , fmt ), encoder (data ))
152+ AGONEncoding (
153+ cast ("Format" , fmt ),
154+ encoder (data ),
155+ AGON ._headers .get (fmt , "" ),
156+ )
147157 for fmt , encoder in AGON ._encoders .items ()
148158 if force is False or fmt != "json"
149159 ]
@@ -162,31 +172,64 @@ def encode_with_format(
162172
163173 return best
164174
175+ @overload
165176 @staticmethod
166- def decode (payload : str ) -> Any :
167- """Decode an AGON-encoded payload.
177+ def decode (payload : AGONEncoding ) -> Any : ...
168178
169- Automatically detects the format by prefix matching.
179+ @overload
180+ @staticmethod
181+ def decode (payload : str , format : Format | None = None ) -> Any : ...
182+
183+ @staticmethod
184+ def decode (
185+ payload : str | AGONEncoding ,
186+ format : Format | None = None ,
187+ ) -> Any :
188+ """Decode an AGON-encoded payload.
170189
171190 Args:
172- payload: Encoded string in any AGON format.
191+ payload: What to decode. Can be:
192+ - AGONEncoding: Decode using its text and format
193+ - str: Encoded string (use format param or auto-detect)
194+ format: Format to use (only for str payload). If None, auto-detects.
173195
174196 Returns:
175197 Decoded Python value.
176198
177199 Raises:
178200 AGONError: If the payload is invalid.
179- """
180- payload = payload .strip ()
181201
182- # Prefix-based decoder dispatch
183- for prefix , decoder in AGON ._decoders .items ():
184- if payload .startswith (prefix ):
185- return decoder (payload )
202+ Example:
203+ >>> result = AGON.encode(data)
204+ >>> AGON.decode(result) # decode AGONEncoding directly
205+ """
206+ if isinstance (payload , AGONEncoding ):
207+ format , payload = payload .format , payload .text
208+
209+ text = payload .strip ()
210+
211+ # Auto-detect from header prefix
212+ if format is None or format == "auto" :
213+ for prefix , decoder in AGON ._decoders .items ():
214+ if text .startswith (prefix ):
215+ return decoder (text )
216+ return AGON ._decode_json (text )
217+
218+ # Dispatch by format
219+ match format :
220+ case "json" :
221+ return AGON ._decode_json (text )
222+ case "text" | "columns" | "struct" :
223+ header = AGON ._headers [cast ("ConcreteFormat" , format )]
224+ if not text .startswith (header ):
225+ text = f"{ header } \n \n { text } "
226+ return AGON ._decoders [header ](text )
186227
187- # Fallback: raw JSON
228+ @staticmethod
229+ def _decode_json (text : str ) -> object :
230+ """Decode JSON text."""
188231 try :
189- return orjson .loads (payload )
232+ return orjson .loads (text )
190233 except orjson .JSONDecodeError as e :
191234 raise AGONError (f"Invalid JSON: { e } " ) from e
192235
0 commit comments