Skip to content

Commit ca3cbf2

Browse files
committed
refactor: hint is now presprective
1 parent 40e911f commit ca3cbf2

5 files changed

Lines changed: 69 additions & 34 deletions

File tree

.pre-commit-config.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@ repos:
2323
args: [--fix, --exit-non-zero-on-fix]
2424
- id: ruff-format
2525

26-
- repo: https://github.com/DetachHead/basedpyright
27-
rev: v1.29.1
26+
- repo: local
2827
hooks:
2928
- id: basedpyright
30-
additional_dependencies: [tiktoken]
29+
name: basedpyright (uv)
30+
entry: uv run basedpyright
31+
language: system
32+
args: [src]
33+
pass_filenames: false
3134

3235
- repo: https://github.com/codespell-project/codespell
3336
rev: v2.4.1

src/agon/core.py

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -256,16 +256,48 @@ def project_data(data: list[dict[str, Any]], keep_paths: list[str]) -> list[dict
256256
return AGONFormat.project_data(data, keep_paths)
257257

258258
@staticmethod
259-
def hint() -> str:
260-
"""Optional short hint for LLMs about AGON format.
259+
def hint(result_or_format: AGONEncoding | ConcreteFormat) -> str:
260+
"""Get a prescriptive hint instructing LLMs how to generate AGON format.
261261
262-
Most LLMs can understand AGON without any hint since it's self-describing.
263-
This hint is ~24 tokens vs ~100+ tokens for traditional schema prompts.
262+
NOTE: LLMs have not been trained on AGON, so generation accuracy cannot
263+
be guaranteed. Use hints when asking LLMs to return AGON-formatted data,
264+
but validate the output. Prefer sending AGON to LLMs (reliable) over
265+
asking LLMs to generate AGON (experimental).
266+
267+
Args:
268+
result_or_format: AGONEncoding result or format name ("text", "columns",
269+
"struct", "json"). Returns generation instructions for that format.
264270
265271
Returns:
266-
A short hint string.
272+
A short prescriptive hint instructing how to generate the format.
273+
274+
Example:
275+
>>> result = AGON.encode(data, format="auto")
276+
>>> AGON.hint(result) # Generation instruction for selected format
277+
'Return in AGON text format: Start with @AGON text header, encode arrays as name[N]{fields} with tab-delimited rows'
278+
>>> AGON.hint("columns") # Generation instruction for columns format
279+
'Return in AGON columns format: Start with @AGON columns header, transpose arrays to name[N] with ├/└ field: val1, val2, ...'
267280
"""
268-
return AGONText.hint()
281+
# Extract format if AGONEncoding was passed
282+
format_name = (
283+
result_or_format.format
284+
if isinstance(result_or_format, AGONEncoding)
285+
else result_or_format
286+
)
287+
288+
# Return hint for specific format
289+
match format_name:
290+
case "text":
291+
return AGONText.hint()
292+
case "columns":
293+
return AGONColumns.hint()
294+
case "struct":
295+
return AGONStruct.hint()
296+
case "json":
297+
return "JSON: Standard compact JSON encoding"
298+
case _:
299+
msg = f"Unknown format: {format_name}"
300+
raise AGONError(msg)
269301

270302
@staticmethod
271303
def count_tokens(text: str, *, encoding: str = DEFAULT_ENCODING) -> int:

src/agon/formats/columns.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ class AGONColumns(AGONFormat):
5353

5454
@staticmethod
5555
def hint() -> str:
56-
"""Return a short hint describing this format for LLMs."""
57-
return "AGON columns: arrays as name[N] with ├/└ field: val1, val2, ..."
56+
"""Return a short hint instructing LLMs how to generate this format."""
57+
return "Return in AGON columns format: Start with @AGON columns header, transpose arrays to name[N] with ├/└ field: val1, val2, ..."
5858

5959
@staticmethod
6060
def encode(
@@ -69,7 +69,7 @@ def encode(
6969
Args:
7070
data: JSON-serializable data to encode.
7171
delimiter: Value delimiter within columns (default: ", ").
72-
include_header: Whether to include @AGON columns v1 header.
72+
include_header: Whether to include @AGON columns header.
7373
use_ascii: Use ASCII tree chars (|, `) instead of Unicode.
7474
7575
Returns:
@@ -210,7 +210,7 @@ def _encode_primitive(val: Any, delimiter: str) -> str:
210210
return "null"
211211
if isinstance(val, bool):
212212
return "true" if val else "false"
213-
if isinstance(val, (int, float)):
213+
if isinstance(val, int | float):
214214
if isinstance(val, float):
215215
if val != val: # NaN
216216
return ""
@@ -267,7 +267,7 @@ def _is_columnar_array(arr: list[Any]) -> tuple[bool, list[str]]:
267267

268268
for obj in arr:
269269
for v in obj.values():
270-
if isinstance(v, (dict, list)):
270+
if isinstance(v, dict | list):
271271
return False, []
272272

273273
key_order: list[str] = []
@@ -281,7 +281,7 @@ def _is_columnar_array(arr: list[Any]) -> tuple[bool, list[str]]:
281281

282282
def _is_primitive_array(arr: list[Any]) -> bool:
283283
"""Check if array contains only primitives."""
284-
return all(not isinstance(x, (dict, list)) for x in arr)
284+
return all(not isinstance(x, dict | list) for x in arr)
285285

286286

287287
def _encode_value(
@@ -295,7 +295,7 @@ def _encode_value(
295295
"""Encode a value, appending lines."""
296296
indent = INDENT * depth
297297

298-
if val is None or isinstance(val, (bool, int, float, str)):
298+
if val is None or isinstance(val, bool | int | float | str):
299299
if name:
300300
lines.append(f"{indent}{name}: {_encode_primitive(val, delimiter)}")
301301
else:
@@ -389,7 +389,7 @@ def _encode_list_item_object(
389389
if isinstance(v, dict):
390390
lines.append(f"{prefix}{k}:")
391391
for nk, nv in v.items():
392-
if isinstance(nv, (dict, list)):
392+
if isinstance(nv, dict | list):
393393
_encode_value(nv, lines, depth + 2, delimiter, nk, use_ascii)
394394
else:
395395
lines.append(f"{indent} {nk}: {_encode_primitive(nv, delimiter)}")
@@ -417,7 +417,7 @@ def _encode_object(
417417
indent = INDENT * depth
418418

419419
for k, v in obj.items():
420-
if isinstance(v, (dict, list)):
420+
if isinstance(v, dict | list):
421421
_encode_value(v, lines, depth, delimiter, k, use_ascii)
422422
else:
423423
lines.append(f"{indent}{k}: {_encode_primitive(v, delimiter)}")

src/agon/formats/struct.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ class AGONStruct(AGONFormat):
6464

6565
@staticmethod
6666
def hint() -> str:
67-
"""Return a short hint describing this format for LLMs."""
68-
return "AGON struct: @Struct: fields defines templates, Struct(v1, v2) instantiates"
67+
"""Return a short hint instructing LLMs how to generate this format."""
68+
return "Return in AGON struct format: Start with @AGON struct header, define templates as @Struct: fields, instantiate as Struct(v1, v2)"
6969

7070
@staticmethod
7171
def encode(
@@ -212,7 +212,7 @@ def _detect_shapes(
212212
if isinstance(data, dict):
213213
# Only count shapes with primitive values
214214
primitive_keys: tuple[str, ...] = tuple(
215-
sorted(k for k, v in data.items() if not isinstance(v, (dict, list)))
215+
sorted(k for k, v in data.items() if not isinstance(v, dict | list))
216216
)
217217
if len(primitive_keys) >= 2:
218218
shapes[primitive_keys] += 1
@@ -322,7 +322,7 @@ def _can_use_struct(obj: dict[str, Any], fields: list[str], optional: set[str])
322322
"""Check if an object can be encoded as a struct instance."""
323323
# Object must have only primitive values
324324
for v in obj.values():
325-
if isinstance(v, (dict, list)):
325+
if isinstance(v, dict | list):
326326
return False
327327

328328
# All required fields must be present
@@ -385,7 +385,7 @@ def _encode_primitive(val: Any, *, for_struct_instance: bool = False) -> str:
385385
return "" if for_struct_instance else "null"
386386
if isinstance(val, bool):
387387
return "true" if val else "false"
388-
if isinstance(val, (int, float)):
388+
if isinstance(val, int | float):
389389
if isinstance(val, float):
390390
if val != val: # NaN
391391
return "" if for_struct_instance else "null"
@@ -466,7 +466,7 @@ def _encode_value(
466466
"""Encode a value, appending lines."""
467467
indent = INDENT * depth
468468

469-
if val is None or isinstance(val, (bool, int, float, str)):
469+
if val is None or isinstance(val, bool | int | float | str):
470470
lines.append(f"{indent}{_encode_primitive(val)}")
471471
return
472472

src/agon/formats/text.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
It uses indentation for hierarchy and tabular format for arrays of objects.
55
66
Format structure:
7-
@AGON text v1
7+
@AGON text
88
@D=<delimiter> # optional, default: \t
99
<data>
1010
1111
Example:
12-
@AGON text v1
12+
@AGON text
1313
1414
products[3]{sku name price}
1515
A123 Widget 9.99
@@ -46,8 +46,8 @@ class AGONText(AGONFormat):
4646

4747
@staticmethod
4848
def hint() -> str:
49-
"""Return a short hint describing this format for LLMs."""
50-
return "AGON text: key:value pairs, arrays as name[N]{fields} with rows"
49+
"""Return a short hint instructing LLMs how to generate this format."""
50+
return "Return in AGON text format: Start with @AGON text header, encode arrays as name[N]{fields} with tab-delimited rows"
5151

5252
@staticmethod
5353
def encode(
@@ -61,7 +61,7 @@ def encode(
6161
Args:
6262
data: JSON-serializable data to encode.
6363
delimiter: Field delimiter for tabular data (default: tab).
64-
include_header: Whether to include @AGON text v1 header.
64+
include_header: Whether to include @AGON text header.
6565
6666
Returns:
6767
AGONText encoded string.
@@ -207,7 +207,7 @@ def _encode_primitive(val: Any, delimiter: str) -> str:
207207
return "null"
208208
if isinstance(val, bool):
209209
return "true" if val else "false"
210-
if isinstance(val, (int, float)):
210+
if isinstance(val, int | float):
211211
# Handle special float values
212212
if isinstance(val, float):
213213
if val != val: # NaN
@@ -272,7 +272,7 @@ def _is_uniform_array(arr: list[Any]) -> tuple[bool, list[str]]:
272272
# Check all objects have only primitive values
273273
for obj in arr:
274274
for v in obj.values():
275-
if isinstance(v, (dict, list)):
275+
if isinstance(v, dict | list):
276276
return False, []
277277

278278
# Return keys in consistent order (first seen order from union)
@@ -287,7 +287,7 @@ def _is_uniform_array(arr: list[Any]) -> tuple[bool, list[str]]:
287287

288288
def _is_primitive_array(arr: list[Any]) -> bool:
289289
"""Check if array contains only primitives."""
290-
return all(not isinstance(x, (dict, list)) for x in arr)
290+
return all(not isinstance(x, dict | list) for x in arr)
291291

292292

293293
def _encode_value(
@@ -300,7 +300,7 @@ def _encode_value(
300300
"""Encode a value, appending lines."""
301301
indent = INDENT * depth
302302

303-
if val is None or isinstance(val, (bool, int, float, str)):
303+
if val is None or isinstance(val, bool | int | float | str):
304304
# Primitive value
305305
if name:
306306
lines.append(f"{indent}{name}: {_encode_primitive(val, delimiter)}")
@@ -401,7 +401,7 @@ def _encode_list_item_object(
401401
# Nested object
402402
lines.append(f"{prefix}{k}:")
403403
for nk, nv in v.items():
404-
if isinstance(nv, (dict, list)):
404+
if isinstance(nv, dict | list):
405405
_encode_value(nv, lines, depth + 2, delimiter, nk)
406406
else:
407407
lines.append(f"{indent} {nk}: {_encode_primitive(nv, delimiter)}")

0 commit comments

Comments
 (0)