99
1010import json
1111import os
12+ import sqlite3
1213import sys
14+ import tempfile
1315import unittest
1416
1517from hypothesis import given , settings
2022 sys .path .insert (0 , REPO_ROOT )
2123
2224from models import Bubble , SchemaError
23- from utils .cli_chat_reader import _extract_blob_refs , messages_to_bubbles
25+ from utils .cli_chat_reader import (
26+ classify_blob_data ,
27+ messages_to_bubbles ,
28+ traverse_blobs ,
29+ _extract_blob_refs ,
30+ )
2431from utils .text_extract import extract_text_from_bubble
2532
2633# Bounded strategies: fast enough for CI (<30s total with default example counts).
3946 max_size = 12 ,
4047)
4148
49+ _BUBBLE_RAW_ANY = st .one_of (
50+ _BUBBLE_RAW ,
51+ st .none (),
52+ st .integers (),
53+ st .lists (st .text (max_size = 40 ), max_size = 5 ),
54+ st .text (max_size = 200 ),
55+ )
56+
4257_BUBBLE_ID = st .text (
4358 alphabet = st .characters (blacklist_categories = ("Cs" ,), blacklist_characters = "\x00 " ),
4459 min_size = 1 ,
4560 max_size = 80 ,
4661)
4762
63+ _BUBBLE_ID_ANY = st .one_of (
64+ _BUBBLE_ID ,
65+ st .just ("" ),
66+ st .none (),
67+ st .integers (min_value = 0 , max_value = 9999 ),
68+ st .binary (min_size = 0 , max_size = 8 ),
69+ )
70+
71+ _BLOB_ID_HEX = st .text (
72+ alphabet = "abcdef0123456789" ,
73+ min_size = 64 ,
74+ max_size = 64 ,
75+ )
76+
77+
4878@st .composite
4979def _cli_message (draw ) -> dict :
80+ # Empty role is intentional adversarial input (unknown / missing role).
5081 role = draw (st .sampled_from (["user" , "assistant" , "system" , "tool" , "" ]))
5182 content = draw (
5283 st .one_of (
@@ -66,6 +97,7 @@ def _cli_message(draw) -> dict:
6697 )
6798 return {"role" : role , "content" : content }
6899
100+
69101_BUBBLE_LIKE = st .dictionaries (
70102 st .sampled_from (["text" , "richText" , "codeBlocks" , "type" , "metadata" ]),
71103 st .one_of (
@@ -84,42 +116,101 @@ def _cli_message(draw) -> dict:
84116 max_size = 6 ,
85117)
86118
119+ _KV_VALUE = st .one_of (
120+ st .none (),
121+ _BUBBLE_RAW ,
122+ st .text (max_size = 400 ),
123+ st .binary (max_size = 256 ),
124+ st .integers (),
125+ )
126+
127+
128+ def _make_meta_value (meta : dict ) -> str :
129+ return json .dumps (meta ).encode ("utf-8" ).hex ()
130+
87131
88- def _classify_blob_bytes (data : bytes ) -> None :
89- """Mirror traverse_blobs blob classification without SQLite."""
132+ def _build_store_db_raw (path : str , meta : dict , blobs : dict [str , bytes ]) -> None :
133+ """Minimal store.db with arbitrary blob payloads (for traverse_blobs fuzz)."""
134+ conn = sqlite3 .connect (path )
135+ conn .execute ("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT)" )
136+ conn .execute ("CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB)" )
137+ conn .execute ("INSERT INTO meta VALUES ('0', ?)" , (_make_meta_value (meta ),))
138+ for blob_id , data in blobs .items ():
139+ conn .execute ("INSERT INTO blobs VALUES (?, ?)" , (blob_id , data ))
140+ conn .commit ()
141+ conn .close ()
142+
143+
144+ def _assemble_workspace_bubble (bubble_id : object , value : object ) -> dict | None :
145+ """Mirror workspace_tabs KV bubble load (json.loads → Bubble.from_dict)."""
90146 try :
91- msg = json .loads (data .decode ("utf-8" ))
92- if isinstance (msg , dict ) and "role" in msg :
93- return
94- except (UnicodeDecodeError , json .JSONDecodeError , TypeError ):
95- pass
96- _extract_blob_refs (data )
147+ if value is None :
148+ return None
149+ if isinstance (value , (bytes , bytearray )):
150+ parsed = json .loads (bytes (value ).decode ("utf-8" ))
151+ elif isinstance (value , str ):
152+ parsed = json .loads (value )
153+ else :
154+ parsed = value
155+ except (json .JSONDecodeError , TypeError , ValueError , UnicodeDecodeError ):
156+ return None
157+ try :
158+ if not isinstance (bubble_id , str ):
159+ Bubble .from_dict (parsed , bubble_id = bubble_id ) # type: ignore[arg-type]
160+ return None
161+ return Bubble .from_dict (parsed , bubble_id = bubble_id ).raw
162+ except SchemaError :
163+ return None
164+
165+
166+ def _parse_bubble_from_dict (raw : object , bubble_id : object ) -> Bubble | None :
167+ """Call Bubble.from_dict; return None on SchemaError, propagate nothing else."""
168+ try :
169+ return Bubble .from_dict (raw , bubble_id = bubble_id ) # type: ignore[arg-type]
170+ except SchemaError :
171+ return None
97172
98173
99174class TestBubbleFromDictFuzz (unittest .TestCase ):
100175 @given (raw = _BUBBLE_RAW , bubble_id = _BUBBLE_ID )
101176 @settings (max_examples = 80 , deadline = None )
102177 def test_never_raises_unhandled (self , raw : dict , bubble_id : str ) -> None :
103- try :
104- bubble = Bubble .from_dict (raw , bubble_id = bubble_id )
105- except SchemaError :
178+ bubble = _parse_bubble_from_dict (raw , bubble_id )
179+ if bubble is None :
106180 return
107- except Exception as exc :
108- self .fail (f"unexpected { type (exc ).__name__ } : { exc } " )
109181 self .assertEqual (bubble .bubble_id , bubble_id )
110182 self .assertIs (bubble .raw , raw )
111183
184+ @given (raw = _BUBBLE_RAW_ANY , bubble_id = _BUBBLE_ID_ANY )
185+ @settings (max_examples = 80 , deadline = None )
186+ def test_adversarial_inputs_only_schema_error_or_success (
187+ self , raw : object , bubble_id : object
188+ ) -> None :
189+ try :
190+ _parse_bubble_from_dict (raw , bubble_id )
191+ except Exception as exc :
192+ self .fail (f"unexpected { type (exc ).__name__ } : { exc } " )
193+
112194 @given (raw = _BUBBLE_RAW , bubble_id = _BUBBLE_ID )
113195 @settings (max_examples = 80 , deadline = None )
114196 def test_parsing_is_idempotent (self , raw : dict , bubble_id : str ) -> None :
197+ first = _parse_bubble_from_dict (raw , bubble_id )
198+ second = _parse_bubble_from_dict (raw , bubble_id )
199+ self .assertEqual (first , second )
200+
201+
202+ class TestWorkspaceTabsAssemblyFuzz (unittest .TestCase ):
203+ @given (bubble_id = _BUBBLE_ID_ANY , value = _KV_VALUE )
204+ @settings (max_examples = 100 , deadline = None )
205+ def test_assemble_workspace_bubble_never_raises (
206+ self , bubble_id : object , value : object
207+ ) -> None :
115208 try :
116- first = Bubble .from_dict (raw , bubble_id = bubble_id )
117- second = Bubble .from_dict (raw , bubble_id = bubble_id )
118- except SchemaError :
119- return
209+ result = _assemble_workspace_bubble (bubble_id , value )
120210 except Exception as exc :
121211 self .fail (f"unexpected { type (exc ).__name__ } : { exc } " )
122- self .assertEqual (first , second )
212+ if result is not None :
213+ self .assertIsInstance (result , dict )
123214
124215
125216class TestBlobChainParsingFuzz (unittest .TestCase ):
@@ -142,21 +233,50 @@ def test_extract_blob_refs_is_idempotent(self, data: bytes) -> None:
142233
143234 @given (data = st .binary (max_size = 4096 ))
144235 @settings (max_examples = 80 , deadline = None )
145- def test_blob_classification_never_raises (self , data : bytes ) -> None :
236+ def test_classify_blob_data_never_raises (self , data : bytes ) -> None :
146237 try :
147- _classify_blob_bytes (data )
238+ msg , refs = classify_blob_data (data )
148239 except Exception as exc :
149240 self .fail (f"unexpected { type (exc ).__name__ } : { exc } " )
241+ if msg is not None :
242+ self .assertIsInstance (msg , dict )
243+ self .assertEqual (refs , [])
244+ else :
245+ self .assertIsInstance (refs , list )
246+
247+ @given (
248+ root_id = _BLOB_ID_HEX ,
249+ extra_ids = st .lists (_BLOB_ID_HEX , max_size = 6 , unique = True ),
250+ payloads = st .lists (st .binary (max_size = 1024 ), min_size = 1 , max_size = 8 ),
251+ )
252+ @settings (max_examples = 40 , deadline = None )
253+ def test_traverse_blobs_never_raises (
254+ self , root_id : str , extra_ids : list [str ], payloads : list [bytes ]
255+ ) -> None :
256+ meta = {"latestRootBlobId" : root_id , "createdAt" : 1_700_000_000_000 }
257+ blobs : dict [str , bytes ] = {root_id : payloads [0 ]}
258+ for i , bid in enumerate (extra_ids ):
259+ if bid not in blobs :
260+ blobs [bid ] = payloads [(i + 1 ) % len (payloads )]
261+ with tempfile .TemporaryDirectory () as td :
262+ db_path = os .path .join (td , "store.db" )
263+ _build_store_db_raw (db_path , meta , blobs )
264+ try :
265+ messages = traverse_blobs (db_path )
266+ except Exception as exc :
267+ self .fail (f"traverse_blobs raised { type (exc ).__name__ } : { exc } " )
268+ self .assertIsInstance (messages , list )
150269
151270
152271class TestTextExtractionFuzz (unittest .TestCase ):
153272 @given (bubble = _BUBBLE_LIKE )
154273 @settings (max_examples = 100 , deadline = None )
155274 def test_extract_text_from_bubble_never_raises (self , bubble : dict ) -> None :
156275 try :
157- extract_text_from_bubble (bubble )
276+ text = extract_text_from_bubble (bubble )
158277 except Exception as exc :
159278 self .fail (f"unexpected { type (exc ).__name__ } : { exc } " )
279+ self .assertIsInstance (text , str )
160280
161281 @given (bubble = _BUBBLE_LIKE )
162282 @settings (max_examples = 80 , deadline = None )
@@ -181,9 +301,23 @@ def test_messages_to_bubbles_then_extract_never_raises(
181301 self .assertIsInstance (bubbles , list )
182302 for bubble in bubbles :
183303 try :
184- extract_text_from_bubble (bubble )
304+ text = extract_text_from_bubble (bubble )
185305 except Exception as exc :
186306 self .fail (f"extract_text_from_bubble raised { type (exc ).__name__ } : { exc } " )
307+ self .assertIsInstance (text , str )
308+
309+ @given (
310+ messages = st .lists (_cli_message (), max_size = 12 ),
311+ created_at = st .integers (min_value = 0 , max_value = 2_000_000_000_000 ),
312+ )
313+ @settings (max_examples = 80 , deadline = None )
314+ def test_messages_to_bubbles_is_idempotent (
315+ self , messages : list [dict ], created_at : int
316+ ) -> None :
317+ self .assertEqual (
318+ messages_to_bubbles (messages , created_at ),
319+ messages_to_bubbles (messages , created_at ),
320+ )
187321
188322
189323if __name__ == "__main__" :
0 commit comments