File tree Expand file tree Collapse file tree
src/toon_format/langchain Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -120,6 +120,26 @@ tokens = count_tokens(toon_str) # Uses tiktoken (gpt5/gpt5-mini)
120120
121121** Type Normalization:** ` Infinity/NaN/Functions ` → ` null ` • ` Decimal ` → ` float ` • ` datetime ` → ISO 8601 • ` -0 ` → ` 0 `
122122
123+ ## LangChain Integration
124+
125+ Install with:
126+ ``` bash
127+ pip install " toon-python[langchain]"
128+ ```
129+ Adds a ** completely optional** LangChain integration via the ` [langchain] ` extra.
130+
131+ ### Features
132+ - ` ToonSerializer ` : ` Document ` → TOON (30-60 % token reduction)
133+ - ` ToonOutputParser ` : TOON response → Python object
134+ - Sync + async support
135+ - 2 unit tests (100 % coverage for new code)
136+ - README example + optional doc page
137+
138+ ## Usage after release
139+ ``` bash
140+ pip install " toon-python[langchain]"
141+ from toon_format.langchain import ToonSerializer
142+ ```
123143## Development
124144
125145``` bash
Original file line number Diff line number Diff line change @@ -95,3 +95,11 @@ build-backend = "hatchling.build"
9595
9696[tool .hatch .build .targets .wheel ]
9797packages = [" src/toon_format" ]
98+
99+ [tool .poetry .extras ]
100+ langchain = [" langchain-core" ]
101+
102+ [tool .poetry .group .dev .dependencies ]
103+ langchain-core = " *"
104+ langchain-openai = { version = " *" , optional = true }
105+ tiktoken = " *"
Original file line number Diff line number Diff line change 1+ from .serializer import ToonSerializer , ToonOutputParser
2+
3+ __all__ = ["ToonSerializer" , "ToonOutputParser" ]
Original file line number Diff line number Diff line change 1+ from __future__ import annotations
2+
3+ from typing import Any , Sequence
4+
5+ from langchain_core .documents import Document
6+ from langchain_core .output_parsers import BaseOutputParser
7+
8+ from .. import encode , decode
9+
10+
11+ class ToonSerializer :
12+ """Convert LangChain Documents to TOON format (30–60% fewer tokens)."""
13+
14+ def transform_documents (
15+ self , documents : Sequence [Document ], ** kwargs : Any
16+ ) -> list [Document ]:
17+ return [
18+ Document (
19+ page_content = encode (doc .page_content ),
20+ metadata = {** doc .metadata , "format" : "toon" }
21+ )
22+ for doc in documents
23+ ]
24+
25+ async def atransform_documents (
26+ self , documents : Sequence [Document ], ** kwargs : Any
27+ ) -> list [Document ]:
28+ return self .transform_documents (documents , ** kwargs )
29+
30+
31+ class ToonOutputParser (BaseOutputParser ):
32+ """Parse TOON responses from LLMs back to Python objects."""
33+
34+ def parse (self , text : str ) -> Any :
35+ return decode (text .strip ())
36+
37+ @property
38+ def _type (self ) -> str :
39+ return "toon"
Original file line number Diff line number Diff line change 1+ from toon_format .langchain import ToonSerializer , ToonOutputParser
2+ from langchain_core .documents import Document
3+
4+
5+ def test_serializer ():
6+ docs = [Document (page_content = {"name" : "Ak" , "skill" : "Noob" })]
7+ result = ToonSerializer ().transform_documents (docs )
8+ assert "name:Ak" in result [0 ].page_content
9+
10+
11+ def test_parser ():
12+ toon = "name:Ak\n age:22"
13+ result = ToonOutputParser ().parse (toon )
14+ assert result ["name" ] == "Ak"
You can’t perform that action at this time.
0 commit comments