Skip to content

Commit a4ab1af

Browse files
committed
improved for issues
1 parent 665b70b commit a4ab1af

6 files changed

Lines changed: 190 additions & 35 deletions

File tree

.github/workflows/tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ jobs:
105105
run: |
106106
python -m pip install --upgrade pip
107107
python -m pip install -r requirements-lock.txt
108+
# Dev-only (not in requirements-lock.txt): pytest + hypothesis for unittest/property tests.
108109
python -m pip install 'pytest>=8,<9' 'hypothesis>=6.100,<7'
109110
110111
- name: Run unittest suite

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,4 @@ Thumbs.db
4343
.coverage
4444
htmlcov/
4545
coverage.xml
46+
.hypothesis/

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ source venv/bin/activate
6161
pip install -r requirements.txt
6262
```
6363

64+
For development (pytest, mypy, Hypothesis property tests):
65+
66+
```bash
67+
pip install -e ".[dev]"
68+
```
69+
6470
For reproducible installs (same versions as CI), use the pinned lock file:
6571

6672
```bash

tests/test_blob_parsing_fuzz.py

Lines changed: 157 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99

1010
import json
1111
import os
12+
import sqlite3
1213
import sys
14+
import tempfile
1315
import unittest
1416

1517
from hypothesis import given, settings
@@ -20,7 +22,12 @@
2022
sys.path.insert(0, REPO_ROOT)
2123

2224
from models import Bubble, SchemaError
23-
from utils.cli_chat_reader import _extract_blob_refs, messages_to_bubbles
25+
from utils.cli_chat_reader import (
26+
classify_blob_data,
27+
messages_to_bubbles,
28+
traverse_blobs,
29+
_extract_blob_refs,
30+
)
2431
from utils.text_extract import extract_text_from_bubble
2532

2633
# Bounded strategies: fast enough for CI (<30s total with default example counts).
@@ -39,14 +46,38 @@
3946
max_size=12,
4047
)
4148

49+
_BUBBLE_RAW_ANY = st.one_of(
50+
_BUBBLE_RAW,
51+
st.none(),
52+
st.integers(),
53+
st.lists(st.text(max_size=40), max_size=5),
54+
st.text(max_size=200),
55+
)
56+
4257
_BUBBLE_ID = st.text(
4358
alphabet=st.characters(blacklist_categories=("Cs",), blacklist_characters="\x00"),
4459
min_size=1,
4560
max_size=80,
4661
)
4762

63+
_BUBBLE_ID_ANY = st.one_of(
64+
_BUBBLE_ID,
65+
st.just(""),
66+
st.none(),
67+
st.integers(min_value=0, max_value=9999),
68+
st.binary(min_size=0, max_size=8),
69+
)
70+
71+
_BLOB_ID_HEX = st.text(
72+
alphabet="abcdef0123456789",
73+
min_size=64,
74+
max_size=64,
75+
)
76+
77+
4878
@st.composite
4979
def _cli_message(draw) -> dict:
80+
# Empty role is intentional adversarial input (unknown / missing role).
5081
role = draw(st.sampled_from(["user", "assistant", "system", "tool", ""]))
5182
content = draw(
5283
st.one_of(
@@ -66,6 +97,7 @@ def _cli_message(draw) -> dict:
6697
)
6798
return {"role": role, "content": content}
6899

100+
69101
_BUBBLE_LIKE = st.dictionaries(
70102
st.sampled_from(["text", "richText", "codeBlocks", "type", "metadata"]),
71103
st.one_of(
@@ -84,42 +116,101 @@ def _cli_message(draw) -> dict:
84116
max_size=6,
85117
)
86118

119+
_KV_VALUE = st.one_of(
120+
st.none(),
121+
_BUBBLE_RAW,
122+
st.text(max_size=400),
123+
st.binary(max_size=256),
124+
st.integers(),
125+
)
126+
127+
128+
def _make_meta_value(meta: dict) -> str:
129+
return json.dumps(meta).encode("utf-8").hex()
130+
87131

88-
def _classify_blob_bytes(data: bytes) -> None:
89-
"""Mirror traverse_blobs blob classification without SQLite."""
132+
def _build_store_db_raw(path: str, meta: dict, blobs: dict[str, bytes]) -> None:
133+
"""Minimal store.db with arbitrary blob payloads (for traverse_blobs fuzz)."""
134+
conn = sqlite3.connect(path)
135+
conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT)")
136+
conn.execute("CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB)")
137+
conn.execute("INSERT INTO meta VALUES ('0', ?)", (_make_meta_value(meta),))
138+
for blob_id, data in blobs.items():
139+
conn.execute("INSERT INTO blobs VALUES (?, ?)", (blob_id, data))
140+
conn.commit()
141+
conn.close()
142+
143+
144+
def _assemble_workspace_bubble(bubble_id: object, value: object) -> dict | None:
145+
"""Mirror workspace_tabs KV bubble load (json.loads → Bubble.from_dict)."""
90146
try:
91-
msg = json.loads(data.decode("utf-8"))
92-
if isinstance(msg, dict) and "role" in msg:
93-
return
94-
except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
95-
pass
96-
_extract_blob_refs(data)
147+
if value is None:
148+
return None
149+
if isinstance(value, (bytes, bytearray)):
150+
parsed = json.loads(bytes(value).decode("utf-8"))
151+
elif isinstance(value, str):
152+
parsed = json.loads(value)
153+
else:
154+
parsed = value
155+
except (json.JSONDecodeError, TypeError, ValueError, UnicodeDecodeError):
156+
return None
157+
try:
158+
if not isinstance(bubble_id, str):
159+
Bubble.from_dict(parsed, bubble_id=bubble_id) # type: ignore[arg-type]
160+
return None
161+
return Bubble.from_dict(parsed, bubble_id=bubble_id).raw
162+
except SchemaError:
163+
return None
164+
165+
166+
def _parse_bubble_from_dict(raw: object, bubble_id: object) -> Bubble | None:
167+
"""Call Bubble.from_dict; return None on SchemaError, propagate nothing else."""
168+
try:
169+
return Bubble.from_dict(raw, bubble_id=bubble_id) # type: ignore[arg-type]
170+
except SchemaError:
171+
return None
97172

98173

99174
class TestBubbleFromDictFuzz(unittest.TestCase):
100175
@given(raw=_BUBBLE_RAW, bubble_id=_BUBBLE_ID)
101176
@settings(max_examples=80, deadline=None)
102177
def test_never_raises_unhandled(self, raw: dict, bubble_id: str) -> None:
103-
try:
104-
bubble = Bubble.from_dict(raw, bubble_id=bubble_id)
105-
except SchemaError:
178+
bubble = _parse_bubble_from_dict(raw, bubble_id)
179+
if bubble is None:
106180
return
107-
except Exception as exc:
108-
self.fail(f"unexpected {type(exc).__name__}: {exc}")
109181
self.assertEqual(bubble.bubble_id, bubble_id)
110182
self.assertIs(bubble.raw, raw)
111183

184+
@given(raw=_BUBBLE_RAW_ANY, bubble_id=_BUBBLE_ID_ANY)
185+
@settings(max_examples=80, deadline=None)
186+
def test_adversarial_inputs_only_schema_error_or_success(
187+
self, raw: object, bubble_id: object
188+
) -> None:
189+
try:
190+
_parse_bubble_from_dict(raw, bubble_id)
191+
except Exception as exc:
192+
self.fail(f"unexpected {type(exc).__name__}: {exc}")
193+
112194
@given(raw=_BUBBLE_RAW, bubble_id=_BUBBLE_ID)
113195
@settings(max_examples=80, deadline=None)
114196
def test_parsing_is_idempotent(self, raw: dict, bubble_id: str) -> None:
197+
first = _parse_bubble_from_dict(raw, bubble_id)
198+
second = _parse_bubble_from_dict(raw, bubble_id)
199+
self.assertEqual(first, second)
200+
201+
202+
class TestWorkspaceTabsAssemblyFuzz(unittest.TestCase):
203+
@given(bubble_id=_BUBBLE_ID_ANY, value=_KV_VALUE)
204+
@settings(max_examples=100, deadline=None)
205+
def test_assemble_workspace_bubble_never_raises(
206+
self, bubble_id: object, value: object
207+
) -> None:
115208
try:
116-
first = Bubble.from_dict(raw, bubble_id=bubble_id)
117-
second = Bubble.from_dict(raw, bubble_id=bubble_id)
118-
except SchemaError:
119-
return
209+
result = _assemble_workspace_bubble(bubble_id, value)
120210
except Exception as exc:
121211
self.fail(f"unexpected {type(exc).__name__}: {exc}")
122-
self.assertEqual(first, second)
212+
if result is not None:
213+
self.assertIsInstance(result, dict)
123214

124215

125216
class TestBlobChainParsingFuzz(unittest.TestCase):
@@ -142,21 +233,50 @@ def test_extract_blob_refs_is_idempotent(self, data: bytes) -> None:
142233

143234
@given(data=st.binary(max_size=4096))
144235
@settings(max_examples=80, deadline=None)
145-
def test_blob_classification_never_raises(self, data: bytes) -> None:
236+
def test_classify_blob_data_never_raises(self, data: bytes) -> None:
146237
try:
147-
_classify_blob_bytes(data)
238+
msg, refs = classify_blob_data(data)
148239
except Exception as exc:
149240
self.fail(f"unexpected {type(exc).__name__}: {exc}")
241+
if msg is not None:
242+
self.assertIsInstance(msg, dict)
243+
self.assertEqual(refs, [])
244+
else:
245+
self.assertIsInstance(refs, list)
246+
247+
@given(
248+
root_id=_BLOB_ID_HEX,
249+
extra_ids=st.lists(_BLOB_ID_HEX, max_size=6, unique=True),
250+
payloads=st.lists(st.binary(max_size=1024), min_size=1, max_size=8),
251+
)
252+
@settings(max_examples=40, deadline=None)
253+
def test_traverse_blobs_never_raises(
254+
self, root_id: str, extra_ids: list[str], payloads: list[bytes]
255+
) -> None:
256+
meta = {"latestRootBlobId": root_id, "createdAt": 1_700_000_000_000}
257+
blobs: dict[str, bytes] = {root_id: payloads[0]}
258+
for i, bid in enumerate(extra_ids):
259+
if bid not in blobs:
260+
blobs[bid] = payloads[(i + 1) % len(payloads)]
261+
with tempfile.TemporaryDirectory() as td:
262+
db_path = os.path.join(td, "store.db")
263+
_build_store_db_raw(db_path, meta, blobs)
264+
try:
265+
messages = traverse_blobs(db_path)
266+
except Exception as exc:
267+
self.fail(f"traverse_blobs raised {type(exc).__name__}: {exc}")
268+
self.assertIsInstance(messages, list)
150269

151270

152271
class TestTextExtractionFuzz(unittest.TestCase):
153272
@given(bubble=_BUBBLE_LIKE)
154273
@settings(max_examples=100, deadline=None)
155274
def test_extract_text_from_bubble_never_raises(self, bubble: dict) -> None:
156275
try:
157-
extract_text_from_bubble(bubble)
276+
text = extract_text_from_bubble(bubble)
158277
except Exception as exc:
159278
self.fail(f"unexpected {type(exc).__name__}: {exc}")
279+
self.assertIsInstance(text, str)
160280

161281
@given(bubble=_BUBBLE_LIKE)
162282
@settings(max_examples=80, deadline=None)
@@ -181,9 +301,23 @@ def test_messages_to_bubbles_then_extract_never_raises(
181301
self.assertIsInstance(bubbles, list)
182302
for bubble in bubbles:
183303
try:
184-
extract_text_from_bubble(bubble)
304+
text = extract_text_from_bubble(bubble)
185305
except Exception as exc:
186306
self.fail(f"extract_text_from_bubble raised {type(exc).__name__}: {exc}")
307+
self.assertIsInstance(text, str)
308+
309+
@given(
310+
messages=st.lists(_cli_message(), max_size=12),
311+
created_at=st.integers(min_value=0, max_value=2_000_000_000_000),
312+
)
313+
@settings(max_examples=80, deadline=None)
314+
def test_messages_to_bubbles_is_idempotent(
315+
self, messages: list[dict], created_at: int
316+
) -> None:
317+
self.assertEqual(
318+
messages_to_bubbles(messages, created_at),
319+
messages_to_bubbles(messages, created_at),
320+
)
187321

188322

189323
if __name__ == "__main__":

utils/cli_chat_reader.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,23 @@ def _extract_blob_refs(data: bytes) -> list[str]:
7979
return refs
8080

8181

82+
def classify_blob_data(data: bytes) -> tuple[dict | None, list[str]]:
83+
"""Classify a blob payload as a JSON message or a binary chain node.
84+
85+
Returns ``(message_dict, [])`` when *data* decodes to a dict with a
86+
``role`` field; otherwise ``(None, refs)`` where *refs* are SHA-256 hex
87+
ids from :func:`_extract_blob_refs`. Used by :func:`traverse_blobs` and
88+
property tests — keep in sync when the load loop changes.
89+
"""
90+
try:
91+
msg = json.loads(data.decode("utf-8"))
92+
if isinstance(msg, dict) and "role" in msg:
93+
return msg, []
94+
except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
95+
pass
96+
return None, _extract_blob_refs(data)
97+
98+
8299
def traverse_blobs(db_path: str) -> list[dict]:
83100
"""Reconstruct the conversation from a ``store.db`` blob graph.
84101
@@ -118,15 +135,11 @@ def traverse_blobs(db_path: str) -> list[dict]:
118135
for blob_id, data in conn.execute("SELECT id, data FROM blobs"):
119136
if not isinstance(data, bytes):
120137
continue
121-
try:
122-
msg = json.loads(data.decode("utf-8"))
123-
if isinstance(msg, dict) and "role" in msg:
124-
json_blobs[blob_id] = msg
125-
continue
126-
except (UnicodeDecodeError, json.JSONDecodeError):
127-
pass
128-
refs = _extract_blob_refs(data)
129-
chain_blobs[blob_id] = refs
138+
msg, refs = classify_blob_data(data)
139+
if msg is not None:
140+
json_blobs[blob_id] = msg
141+
else:
142+
chain_blobs[blob_id] = refs
130143

131144
# BFS from root (newest-first by nature of the linked-list structure);
132145
# reverse at the end to restore chronological (oldest→newest) order.

utils/text_extract.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ def extract_text_from_bubble(bubble: dict) -> str:
2828

2929
text = ""
3030

31-
# Try text field first
31+
# Try text field first (coerce non-str values — Cursor payloads can drift)
3232
if bubble.get("text") and str(bubble["text"]).strip():
33-
text = bubble["text"]
33+
text = str(bubble["text"])
3434

3535
# Fall back to richText
3636
if not text and bubble.get("richText"):
@@ -49,7 +49,7 @@ def extract_text_from_bubble(bubble: dict) -> str:
4949
lang = cb.get("language", "")
5050
text += f"\n\n```{lang}\n{cb['content']}\n```"
5151

52-
return text
52+
return text if isinstance(text, str) else ""
5353

5454

5555
def slug(s: str) -> str:

0 commit comments

Comments
 (0)