From 665b70b5f396029510f62e5faa6c6a65d350f0e5 Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 09:59:46 -0400
Subject: [PATCH 1/9] initial implementation

---
 .github/workflows/tests.yml     |   2 +-
 pyproject.toml                  |   1 +
 tests/test_blob_parsing_fuzz.py | 190 ++++++++++++++++++++++++++++++++
 3 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_blob_parsing_fuzz.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b040985..2d1ae62 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -105,7 +105,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements-lock.txt
-          python -m pip install 'pytest>=8,<9'
+          python -m pip install 'pytest>=8,<9' 'hypothesis>=6.100,<7'
 
       - name: Run unittest suite
         run: python -m unittest discover tests -v
diff --git a/pyproject.toml b/pyproject.toml
index 2c4226b..ea79f67 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,7 @@ desktop = ["pywebview>=5.0,<6"]
 dev = [
     "pytest>=8,<9",
     "mypy>=1.10,<2",
+    "hypothesis>=6.100,<7",
 ]
 
 [project.scripts]
diff --git a/tests/test_blob_parsing_fuzz.py b/tests/test_blob_parsing_fuzz.py
new file mode 100644
index 0000000..8df056c
--- /dev/null
+++ b/tests/test_blob_parsing_fuzz.py
@@ -0,0 +1,190 @@
+"""Property-based fuzz tests for blob / bubble parsing (issue #71).
+
+Run:
+  python -m unittest tests.test_blob_parsing_fuzz -v
+  python -m pytest tests/test_blob_parsing_fuzz.py -v
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import unittest
+
+from hypothesis import given, settings
+from hypothesis import strategies as st
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if REPO_ROOT not in sys.path:
+    sys.path.insert(0, REPO_ROOT)
+
+from models import Bubble, SchemaError
+from utils.cli_chat_reader import _extract_blob_refs, messages_to_bubbles
+from utils.text_extract import extract_text_from_bubble
+
+# Bounded strategies: fast enough for CI (<30s total with default example counts).
+_JSON_VALUES = st.one_of(
+    st.none(),
+    st.booleans(),
+    st.integers(),
+    st.floats(allow_nan=False, allow_infinity=False),
+    st.text(max_size=200),
+    st.lists(st.text(max_size=80), max_size=8),
+)
+
+_BUBBLE_RAW = st.dictionaries(
+    st.text(min_size=0, max_size=40),
+    _JSON_VALUES,
+    max_size=12,
+)
+
+_BUBBLE_ID = st.text(
+    alphabet=st.characters(blacklist_categories=("Cs",), blacklist_characters="\x00"),
+    min_size=1,
+    max_size=80,
+)
+
+@st.composite
+def _cli_message(draw) -> dict:
+    role = draw(st.sampled_from(["user", "assistant", "system", "tool", ""]))
+    content = draw(
+        st.one_of(
+            st.text(max_size=500),
+            st.lists(
+                st.dictionaries(
+                    st.sampled_from(
+                        ["type", "text", "toolName", "args", "toolCallId", "result"]
+                    ),
+                    st.one_of(st.text(max_size=120), st.integers(), st.none()),
+                    max_size=6,
+                ),
+                max_size=8,
+            ),
+            st.none(),
+        )
+    )
+    return {"role": role, "content": content}
+
+_BUBBLE_LIKE = st.dictionaries(
+    st.sampled_from(["text", "richText", "codeBlocks", "type", "metadata"]),
+    st.one_of(
+        st.text(max_size=300),
+        st.none(),
+        st.lists(
+            st.dictionaries(
+                st.text(max_size=20),
+                st.one_of(st.text(max_size=100), st.integers()),
+                max_size=5,
+            ),
+            max_size=4,
+        ),
+        st.dictionaries(st.text(max_size=20), _JSON_VALUES, max_size=5),
+    ),
+    max_size=6,
+)
+
+
+def _classify_blob_bytes(data: bytes) -> None:
+    """Mirror traverse_blobs blob classification without SQLite."""
+    try:
+        msg = json.loads(data.decode("utf-8"))
+        if isinstance(msg, dict) and "role" in msg:
+            return
+    except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
+        pass
+    _extract_blob_refs(data)
+
+
+class TestBubbleFromDictFuzz(unittest.TestCase):
+    @given(raw=_BUBBLE_RAW, bubble_id=_BUBBLE_ID)
+    @settings(max_examples=80, deadline=None)
+    def test_never_raises_unhandled(self, raw: dict, bubble_id: str) -> None:
+        try:
+            bubble = Bubble.from_dict(raw, bubble_id=bubble_id)
+        except SchemaError:
+            return
+        except Exception as exc:
+            self.fail(f"unexpected {type(exc).__name__}: {exc}")
+        self.assertEqual(bubble.bubble_id, bubble_id)
+        self.assertIs(bubble.raw, raw)
+
+    @given(raw=_BUBBLE_RAW, bubble_id=_BUBBLE_ID)
+    @settings(max_examples=80, deadline=None)
+    def test_parsing_is_idempotent(self, raw: dict, bubble_id: str) -> None:
+        try:
+            first = Bubble.from_dict(raw, bubble_id=bubble_id)
+            second = Bubble.from_dict(raw, bubble_id=bubble_id)
+        except SchemaError:
+            return
+        except Exception as exc:
+            self.fail(f"unexpected {type(exc).__name__}: {exc}")
+        self.assertEqual(first, second)
+
+
+class TestBlobChainParsingFuzz(unittest.TestCase):
+    @given(data=st.binary(max_size=4096))
+    @settings(max_examples=120, deadline=None)
+    def test_extract_blob_refs_never_raises(self, data: bytes) -> None:
+        try:
+            refs = _extract_blob_refs(data)
+        except Exception as exc:
+            self.fail(f"unexpected {type(exc).__name__}: {exc}")
+        self.assertIsInstance(refs, list)
+        for ref in refs:
+            self.assertIsInstance(ref, str)
+            self.assertEqual(len(ref), 64)
+
+    @given(data=st.binary(max_size=4096))
+    @settings(max_examples=80, deadline=None)
+    def test_extract_blob_refs_is_idempotent(self, data: bytes) -> None:
+        self.assertEqual(_extract_blob_refs(data), _extract_blob_refs(data))
+
+    @given(data=st.binary(max_size=4096))
+    @settings(max_examples=80, deadline=None)
+    def test_blob_classification_never_raises(self, data: bytes) -> None:
+        try:
+            _classify_blob_bytes(data)
+        except Exception as exc:
+            self.fail(f"unexpected {type(exc).__name__}: {exc}")
+
+
+class TestTextExtractionFuzz(unittest.TestCase):
+    @given(bubble=_BUBBLE_LIKE)
+    @settings(max_examples=100, deadline=None)
+    def test_extract_text_from_bubble_never_raises(self, bubble: dict) -> None:
+        try:
+            extract_text_from_bubble(bubble)
+        except Exception as exc:
+            self.fail(f"unexpected {type(exc).__name__}: {exc}")
+
+    @given(bubble=_BUBBLE_LIKE)
+    @settings(max_examples=80, deadline=None)
+    def test_extract_text_is_idempotent(self, bubble: dict) -> None:
+        self.assertEqual(
+            extract_text_from_bubble(bubble),
+            extract_text_from_bubble(bubble),
+        )
+
+    @given(
+        messages=st.lists(_cli_message(), max_size=12),
+        created_at=st.integers(min_value=0, max_value=2_000_000_000_000),
+    )
+    @settings(max_examples=80, deadline=None)
+    def test_messages_to_bubbles_then_extract_never_raises(
+        self, messages: list[dict], created_at: int
+    ) -> None:
+        try:
+            bubbles = messages_to_bubbles(messages, created_at)
+        except Exception as exc:
+            self.fail(f"messages_to_bubbles raised {type(exc).__name__}: {exc}")
+        self.assertIsInstance(bubbles, list)
+        for bubble in bubbles:
+            try:
+                extract_text_from_bubble(bubble)
+            except Exception as exc:
+                self.fail(f"extract_text_from_bubble raised {type(exc).__name__}: {exc}")
+
+
+if __name__ == "__main__":
+    unittest.main()

From a4ab1aff08544c685b194b63aff65e01afd440db Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 10:14:32 -0400
Subject: [PATCH 2/9] improved for issues

---
 .github/workflows/tests.yml     |   1 +
 .gitignore                      |   1 +
 README.md                       |   6 ++
 tests/test_blob_parsing_fuzz.py | 180 ++++++++++++++++++++++++++++----
 utils/cli_chat_reader.py        |  31 ++++--
 utils/text_extract.py           |   6 +-
 6 files changed, 190 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2d1ae62..2342a44 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -105,6 +105,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements-lock.txt
+          # Dev-only (not in requirements-lock.txt): pytest + hypothesis for unittest/property tests.
           python -m pip install 'pytest>=8,<9' 'hypothesis>=6.100,<7'
 
       - name: Run unittest suite
diff --git a/.gitignore b/.gitignore
index 685a7ae..5fd078f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,4 @@ Thumbs.db
 .coverage
 htmlcov/
 coverage.xml
+.hypothesis/
diff --git a/README.md b/README.md
index 4ca8e78..007802e 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,12 @@ source venv/bin/activate
 pip install -r requirements.txt
 ```
 
+For development (pytest, mypy, Hypothesis property tests):
+
+```bash
+pip install -e ".[dev]"
+```
+
 For reproducible installs (same versions as CI), use the pinned lock file:
 
 ```bash
diff --git a/tests/test_blob_parsing_fuzz.py b/tests/test_blob_parsing_fuzz.py
index 8df056c..ba4af09 100644
--- a/tests/test_blob_parsing_fuzz.py
+++ b/tests/test_blob_parsing_fuzz.py
@@ -9,7 +9,9 @@
 
 import json
 import os
+import sqlite3
 import sys
+import tempfile
 import unittest
 
 from hypothesis import given, settings
@@ -20,7 +22,12 @@
     sys.path.insert(0, REPO_ROOT)
 
 from models import Bubble, SchemaError
-from utils.cli_chat_reader import _extract_blob_refs, messages_to_bubbles
+from utils.cli_chat_reader import (
+    classify_blob_data,
+    messages_to_bubbles,
+    traverse_blobs,
+    _extract_blob_refs,
+)
 from utils.text_extract import extract_text_from_bubble
 
 # Bounded strategies: fast enough for CI (<30s total with default example counts).
@@ -39,14 +46,38 @@
     max_size=12,
 )
 
+_BUBBLE_RAW_ANY = st.one_of(
+    _BUBBLE_RAW,
+    st.none(),
+    st.integers(),
+    st.lists(st.text(max_size=40), max_size=5),
+    st.text(max_size=200),
+)
+
 _BUBBLE_ID = st.text(
     alphabet=st.characters(blacklist_categories=("Cs",), blacklist_characters="\x00"),
     min_size=1,
     max_size=80,
 )
 
+_BUBBLE_ID_ANY = st.one_of(
+    _BUBBLE_ID,
+    st.just(""),
+    st.none(),
+    st.integers(min_value=0, max_value=9999),
+    st.binary(min_size=0, max_size=8),
+)
+
+_BLOB_ID_HEX = st.text(
+    alphabet="abcdef0123456789",
+    min_size=64,
+    max_size=64,
+)
+
+
 @st.composite
 def _cli_message(draw) -> dict:
+    # Empty role is intentional adversarial input (unknown / missing role).
     role = draw(st.sampled_from(["user", "assistant", "system", "tool", ""]))
     content = draw(
         st.one_of(
@@ -66,6 +97,7 @@ def _cli_message(draw) -> dict:
     )
     return {"role": role, "content": content}
 
+
 _BUBBLE_LIKE = st.dictionaries(
     st.sampled_from(["text", "richText", "codeBlocks", "type", "metadata"]),
     st.one_of(
@@ -84,42 +116,101 @@ def _cli_message(draw) -> dict:
     max_size=6,
 )
 
+_KV_VALUE = st.one_of(
+    st.none(),
+    _BUBBLE_RAW,
+    st.text(max_size=400),
+    st.binary(max_size=256),
+    st.integers(),
+)
+
+
+def _make_meta_value(meta: dict) -> str:
+    return json.dumps(meta).encode("utf-8").hex()
+
 
-def _classify_blob_bytes(data: bytes) -> None:
-    """Mirror traverse_blobs blob classification without SQLite."""
+def _build_store_db_raw(path: str, meta: dict, blobs: dict[str, bytes]) -> None:
+    """Minimal store.db with arbitrary blob payloads (for traverse_blobs fuzz)."""
+    conn = sqlite3.connect(path)
+    conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT)")
+    conn.execute("CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB)")
+    conn.execute("INSERT INTO meta VALUES ('0', ?)", (_make_meta_value(meta),))
+    for blob_id, data in blobs.items():
+        conn.execute("INSERT INTO blobs VALUES (?, ?)", (blob_id, data))
+    conn.commit()
+    conn.close()
+
+
+def _assemble_workspace_bubble(bubble_id: object, value: object) -> dict | None:
+    """Mirror workspace_tabs KV bubble load (json.loads → Bubble.from_dict)."""
     try:
-        msg = json.loads(data.decode("utf-8"))
-        if isinstance(msg, dict) and "role" in msg:
-            return
-    except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
-        pass
-    _extract_blob_refs(data)
+        if value is None:
+            return None
+        if isinstance(value, (bytes, bytearray)):
+            parsed = json.loads(bytes(value).decode("utf-8"))
+        elif isinstance(value, str):
+            parsed = json.loads(value)
+        else:
+            parsed = value
+    except (json.JSONDecodeError, TypeError, ValueError, UnicodeDecodeError):
+        return None
+    try:
+        if not isinstance(bubble_id, str):
+            Bubble.from_dict(parsed, bubble_id=bubble_id)  # type: ignore[arg-type]
+            return None
+        return Bubble.from_dict(parsed, bubble_id=bubble_id).raw
+    except SchemaError:
+        return None
+
+
+def _parse_bubble_from_dict(raw: object, bubble_id: object) -> Bubble | None:
+    """Call Bubble.from_dict; return None on SchemaError, propagate nothing else."""
+    try:
+        return Bubble.from_dict(raw, bubble_id=bubble_id)  # type: ignore[arg-type]
+    except SchemaError:
+        return None
 
 
 class TestBubbleFromDictFuzz(unittest.TestCase):
     @given(raw=_BUBBLE_RAW, bubble_id=_BUBBLE_ID)
     @settings(max_examples=80, deadline=None)
     def test_never_raises_unhandled(self, raw: dict, bubble_id: str) -> None:
-        try:
-            bubble = Bubble.from_dict(raw, bubble_id=bubble_id)
-        except SchemaError:
+        bubble = _parse_bubble_from_dict(raw, bubble_id)
+        if bubble is None:
             return
-        except Exception as exc:
-            self.fail(f"unexpected {type(exc).__name__}: {exc}")
         self.assertEqual(bubble.bubble_id, bubble_id)
         self.assertIs(bubble.raw, raw)
 
+    @given(raw=_BUBBLE_RAW_ANY, bubble_id=_BUBBLE_ID_ANY)
+    @settings(max_examples=80, deadline=None)
+    def test_adversarial_inputs_only_schema_error_or_success(
+        self, raw: object, bubble_id: object
+    ) -> None:
+        try:
+            _parse_bubble_from_dict(raw, bubble_id)
+        except Exception as exc:
+            self.fail(f"unexpected {type(exc).__name__}: {exc}")
+
     @given(raw=_BUBBLE_RAW, bubble_id=_BUBBLE_ID)
     @settings(max_examples=80, deadline=None)
     def test_parsing_is_idempotent(self, raw: dict, bubble_id: str) -> None:
+        first = _parse_bubble_from_dict(raw, bubble_id)
+        second = _parse_bubble_from_dict(raw, bubble_id)
+        self.assertEqual(first, second)
+
+
+class TestWorkspaceTabsAssemblyFuzz(unittest.TestCase):
+    @given(bubble_id=_BUBBLE_ID_ANY, value=_KV_VALUE)
+    @settings(max_examples=100, deadline=None)
+    def test_assemble_workspace_bubble_never_raises(
+        self, bubble_id: object, value: object
+    ) -> None:
         try:
-            first = Bubble.from_dict(raw, bubble_id=bubble_id)
-            second = Bubble.from_dict(raw, bubble_id=bubble_id)
-        except SchemaError:
-            return
+            result = _assemble_workspace_bubble(bubble_id, value)
         except Exception as exc:
             self.fail(f"unexpected {type(exc).__name__}: {exc}")
-        self.assertEqual(first, second)
+        if result is not None:
+            self.assertIsInstance(result, dict)
 
 
 class TestBlobChainParsingFuzz(unittest.TestCase):
@@ -142,11 +233,39 @@ def test_extract_blob_refs_is_idempotent(self, data: bytes) -> None:
 
     @given(data=st.binary(max_size=4096))
     @settings(max_examples=80, deadline=None)
-    def test_blob_classification_never_raises(self, data: bytes) -> None:
+    def test_classify_blob_data_never_raises(self, data: bytes) -> None:
         try:
-            _classify_blob_bytes(data)
+            msg, refs = classify_blob_data(data)
         except Exception as exc:
             self.fail(f"unexpected {type(exc).__name__}: {exc}")
+        if msg is not None:
+            self.assertIsInstance(msg, dict)
+            self.assertEqual(refs, [])
+        else:
+            self.assertIsInstance(refs, list)
+
+    @given(
+        root_id=_BLOB_ID_HEX,
+        extra_ids=st.lists(_BLOB_ID_HEX, max_size=6, unique=True),
+        payloads=st.lists(st.binary(max_size=1024), min_size=1, max_size=8),
+    )
+    @settings(max_examples=40, deadline=None)
+    def test_traverse_blobs_never_raises(
+        self, root_id: str, extra_ids: list[str], payloads: list[bytes]
+    ) -> None:
+        meta = {"latestRootBlobId": root_id, "createdAt": 1_700_000_000_000}
+        blobs: dict[str, bytes] = {root_id: payloads[0]}
+        for i, bid in enumerate(extra_ids):
+            if bid not in blobs:
+                blobs[bid] = payloads[(i + 1) % len(payloads)]
+        with tempfile.TemporaryDirectory() as td:
+            db_path = os.path.join(td, "store.db")
+            _build_store_db_raw(db_path, meta, blobs)
+            try:
+                messages = traverse_blobs(db_path)
+            except Exception as exc:
+                self.fail(f"traverse_blobs raised {type(exc).__name__}: {exc}")
+            self.assertIsInstance(messages, list)
 
 
 class TestTextExtractionFuzz(unittest.TestCase):
@@ -154,9 +273,10 @@ class TestTextExtractionFuzz(unittest.TestCase):
     @settings(max_examples=100, deadline=None)
     def test_extract_text_from_bubble_never_raises(self, bubble: dict) -> None:
         try:
-            extract_text_from_bubble(bubble)
+            text = extract_text_from_bubble(bubble)
         except Exception as exc:
             self.fail(f"unexpected {type(exc).__name__}: {exc}")
+        self.assertIsInstance(text, str)
 
     @given(bubble=_BUBBLE_LIKE)
     @settings(max_examples=80, deadline=None)
@@ -181,9 +301,23 @@ def test_messages_to_bubbles_then_extract_never_raises(
         self.assertIsInstance(bubbles, list)
         for bubble in bubbles:
             try:
-                extract_text_from_bubble(bubble)
+                text = extract_text_from_bubble(bubble)
             except Exception as exc:
                 self.fail(f"extract_text_from_bubble raised {type(exc).__name__}: {exc}")
+            self.assertIsInstance(text, str)
+
+    @given(
+        messages=st.lists(_cli_message(), max_size=12),
+        created_at=st.integers(min_value=0, max_value=2_000_000_000_000),
+    )
+    @settings(max_examples=80, deadline=None)
+    def test_messages_to_bubbles_is_idempotent(
+        self, messages: list[dict], created_at: int
+    ) -> None:
+        self.assertEqual(
+            messages_to_bubbles(messages, created_at),
+            messages_to_bubbles(messages, created_at),
+        )
 
 
 if __name__ == "__main__":
diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py
index 14dbd0c..f0f1a4f 100644
--- a/utils/cli_chat_reader.py
+++ b/utils/cli_chat_reader.py
@@ -79,6 +79,23 @@ def _extract_blob_refs(data: bytes) -> list[str]:
     return refs
 
 
+def classify_blob_data(data: bytes) -> tuple[dict | None, list[str]]:
+    """Classify a blob payload as a JSON message or a binary chain node.
+
+    Returns ``(message_dict, [])`` when *data* decodes to a dict with a
+    ``role`` field; otherwise ``(None, refs)`` where *refs* are SHA-256 hex
+    ids from :func:`_extract_blob_refs`.  Used by :func:`traverse_blobs` and
+    property tests — keep in sync when the load loop changes.
+    """
+    try:
+        msg = json.loads(data.decode("utf-8"))
+        if isinstance(msg, dict) and "role" in msg:
+            return msg, []
+    except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
+        pass
+    return None, _extract_blob_refs(data)
+
+
 def traverse_blobs(db_path: str) -> list[dict]:
     """Reconstruct the conversation from a ``store.db`` blob graph.
 
@@ -118,15 +135,11 @@ def traverse_blobs(db_path: str) -> list[dict]:
         for blob_id, data in conn.execute("SELECT id, data FROM blobs"):
             if not isinstance(data, bytes):
                 continue
-            try:
-                msg = json.loads(data.decode("utf-8"))
-                if isinstance(msg, dict) and "role" in msg:
-                    json_blobs[blob_id] = msg
-                    continue
-            except (UnicodeDecodeError, json.JSONDecodeError):
-                pass
-            refs = _extract_blob_refs(data)
-            chain_blobs[blob_id] = refs
+            msg, refs = classify_blob_data(data)
+            if msg is not None:
+                json_blobs[blob_id] = msg
+            else:
+                chain_blobs[blob_id] = refs
 
     # BFS from root (newest-first by nature of the linked-list structure);
     # reverse at the end to restore chronological (oldest→newest) order.
diff --git a/utils/text_extract.py b/utils/text_extract.py
index d0b179c..644ec10 100644
--- a/utils/text_extract.py
+++ b/utils/text_extract.py
@@ -28,9 +28,9 @@ def extract_text_from_bubble(bubble: dict) -> str:
 
     text = ""
 
-    # Try text field first
+    # Try text field first (coerce non-str values — Cursor payloads can drift)
     if bubble.get("text") and str(bubble["text"]).strip():
-        text = bubble["text"]
+        text = str(bubble["text"])
 
     # Fall back to richText
     if not text and bubble.get("richText"):
@@ -49,7 +49,7 @@ def extract_text_from_bubble(bubble: dict) -> str:
                 lang = cb.get("language", "")
                 text += f"\n\n```{lang}\n{cb['content']}\n```"
 
-    return text
+    return text if isinstance(text, str) else ""
 
 
 def slug(s: str) -> str:

From 8d0f2976f8471864c9ee61de560f77264ada39aa Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 10:41:20 -0400
Subject: [PATCH 3/9] =?UTF-8?q?fix:=20Remove=20dead=20return=20None=20in?=
 =?UTF-8?q?=20=5Fassemble=5Fworkspace=5Fbubble=20when=20bubble=5Fid=20isn?=
 =?UTF-8?q?=E2=80=99t=20a=20str.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_blob_parsing_fuzz.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/test_blob_parsing_fuzz.py b/tests/test_blob_parsing_fuzz.py
index ba4af09..6852d15 100644
--- a/tests/test_blob_parsing_fuzz.py
+++ b/tests/test_blob_parsing_fuzz.py
@@ -155,10 +155,7 @@ def _assemble_workspace_bubble(bubble_id: object, value: object) -> dict | None:
     except (json.JSONDecodeError, TypeError, ValueError, UnicodeDecodeError):
         return None
     try:
-        if not isinstance(bubble_id, str):
-            Bubble.from_dict(parsed, bubble_id=bubble_id)  # type: ignore[arg-type]
-            return None
-        return Bubble.from_dict(parsed, bubble_id=bubble_id).raw
+        return Bubble.from_dict(parsed, bubble_id=bubble_id).raw  # type: ignore[arg-type]
     except SchemaError:
         return None
 

From 24a144bc90f98d75f6afeed86a7dbfc03ec2cd2d Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 10:52:59 -0400
Subject: [PATCH 4/9] fix: resolve findings from coderabbitai's review report

---
 tests/test_blob_parsing_fuzz.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/tests/test_blob_parsing_fuzz.py b/tests/test_blob_parsing_fuzz.py
index 6852d15..1ab780c 100644
--- a/tests/test_blob_parsing_fuzz.py
+++ b/tests/test_blob_parsing_fuzz.py
@@ -14,7 +14,7 @@
 import tempfile
 import unittest
 
-from hypothesis import given, settings
+from hypothesis import HealthCheck, given, settings
 from hypothesis import strategies as st
 
 REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -26,7 +26,7 @@
     classify_blob_data,
     messages_to_bubbles,
     traverse_blobs,
-    _extract_blob_refs,
+    _extract_blob_refs,  # internal helper; covered directly alongside classify_blob_data
 )
 from utils.text_extract import extract_text_from_bubble
 
@@ -76,7 +76,7 @@
 
 
 @st.composite
-def _cli_message(draw) -> dict:
+def _cli_message(draw):
     # Empty role is intentional adversarial input (unknown / missing role).
     role = draw(st.sampled_from(["user", "assistant", "system", "tool", ""]))
     content = draw(
@@ -142,7 +142,12 @@ def _build_store_db_raw(path: str, meta: dict, blobs: dict[str, bytes]) -> None:
 
 
 def _assemble_workspace_bubble(bubble_id: object, value: object) -> dict | None:
-    """Mirror workspace_tabs KV bubble load (json.loads → Bubble.from_dict)."""
+    """Mirror workspace_tabs KV bubble load (json.loads → Bubble.from_dict).
+
+    Intentionally re-implements the conversion instead of importing
+    ``_loads_kv_value_logged`` (logging / payload hashing side effects).
+    Keep in sync with the bubbleId load loop in ``services/workspace_tabs.py``.
+    """
     try:
         if value is None:
             return None
@@ -246,10 +251,15 @@ def test_classify_blob_data_never_raises(self, data: bytes) -> None:
         extra_ids=st.lists(_BLOB_ID_HEX, max_size=6, unique=True),
         payloads=st.lists(st.binary(max_size=1024), min_size=1, max_size=8),
     )
-    @settings(max_examples=40, deadline=None)
+    @settings(
+        max_examples=40,
+        deadline=None,
+        suppress_health_check=[HealthCheck.too_slow],
+    )
     def test_traverse_blobs_never_raises(
         self, root_id: str, extra_ids: list[str], payloads: list[bytes]
     ) -> None:
+        # CliSessionMeta only requires latestRootBlobId (str); BFS runs after meta parse.
         meta = {"latestRootBlobId": root_id, "createdAt": 1_700_000_000_000}
         blobs: dict[str, bytes] = {root_id: payloads[0]}
         for i, bid in enumerate(extra_ids):

From b2352c8150b6ff04d022a2b71e4143ecafdfc112 Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 12:22:34 -0400
Subject: [PATCH 5/9] fix: _assemble_workspace_bubble doesn't truly mirror
 production.

---
 tests/test_blob_parsing_fuzz.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/tests/test_blob_parsing_fuzz.py b/tests/test_blob_parsing_fuzz.py
index 1ab780c..87261f3 100644
--- a/tests/test_blob_parsing_fuzz.py
+++ b/tests/test_blob_parsing_fuzz.py
@@ -144,20 +144,17 @@ def _build_store_db_raw(path: str, meta: dict, blobs: dict[str, bytes]) -> None:
 def _assemble_workspace_bubble(bubble_id: object, value: object) -> dict | None:
     """Mirror workspace_tabs KV bubble load (json.loads → Bubble.from_dict).
 
-    Intentionally re-implements the conversion instead of importing
-    ``_loads_kv_value_logged`` (logging / payload hashing side effects).
-    Keep in sync with the bubbleId load loop in ``services/workspace_tabs.py``.
+    Matches ``services/workspace_tabs.py`` (bubbleId loop): ``json.loads(row["value"])``
+    with no type branching — same exceptions as production. Rows with ``value IS NULL``
+    are not selected in production; ``None`` here returns ``None`` for fuzz only.
+
+    Intentionally omits ``_loads_kv_value_logged`` (logging / payload hashing).
     """
+    if value is None:
+        return None
     try:
-        if value is None:
-            return None
-        if isinstance(value, (bytes, bytearray)):
-            parsed = json.loads(bytes(value).decode("utf-8"))
-        elif isinstance(value, str):
-            parsed = json.loads(value)
-        else:
-            parsed = value
-    except (json.JSONDecodeError, TypeError, ValueError, UnicodeDecodeError):
+        parsed = json.loads(value)  # type: ignore[arg-type]
+    except (json.JSONDecodeError, TypeError, ValueError):
         return None
     try:
         return Bubble.from_dict(parsed, bubble_id=bubble_id).raw  # type: ignore[arg-type]

From 49734fe39b0a26ce68ee7b2a58bfdb4da839766b Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 12:26:27 -0400
Subject: [PATCH 6/9] fix: classify_blob_data widens the except list
 unnecessarily

---
 utils/cli_chat_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py
index f0f1a4f..1864af7 100644
--- a/utils/cli_chat_reader.py
+++ b/utils/cli_chat_reader.py
@@ -91,7 +91,7 @@ def classify_blob_data(data: bytes) -> tuple[dict | None, list[str]]:
         msg = json.loads(data.decode("utf-8"))
         if isinstance(msg, dict) and "role" in msg:
             return msg, []
-    except (UnicodeDecodeError, json.JSONDecodeError, TypeError):
+    except (UnicodeDecodeError, json.JSONDecodeError):
         pass
     return None, _extract_blob_refs(data)
 

From 62902133f247dcad7b7ac95055816f2239b76325 Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 12:31:24 -0400
Subject: [PATCH 7/9] fix: _BUBBLE_LIKE doesn't directly cover the bug
 text_extract.py fixes

---
 tests/test_blob_parsing_fuzz.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_blob_parsing_fuzz.py b/tests/test_blob_parsing_fuzz.py
index 87261f3..d148d6b 100644
--- a/tests/test_blob_parsing_fuzz.py
+++ b/tests/test_blob_parsing_fuzz.py
@@ -102,6 +102,8 @@ def _cli_message(draw):
     st.sampled_from(["text", "richText", "codeBlocks", "type", "metadata"]),
     st.one_of(
         st.text(max_size=300),
+        st.integers(),
+        st.booleans(),
         st.none(),
         st.lists(
             st.dictionaries(

From 0289d3e7252e23fb46cb0a022260e45b0c4cdc8f Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 12:39:12 -0400
Subject: [PATCH 8/9] fix: test_traverse_blobs_never_raises skips the
 meta-parse error path

---
 tests/test_blob_parsing_fuzz.py | 49 +++++++++++++++++++++++++++++----
 tests/test_cli_chat_reader.py   | 14 +++++-----
 tests/test_models.py            |  8 +++---
 utils/cli_chat_reader.py        |  6 ++--
 4 files changed, 58 insertions(+), 19 deletions(-)

diff --git a/tests/test_blob_parsing_fuzz.py b/tests/test_blob_parsing_fuzz.py
index d148d6b..36fd753 100644
--- a/tests/test_blob_parsing_fuzz.py
+++ b/tests/test_blob_parsing_fuzz.py
@@ -24,9 +24,9 @@
 from models import Bubble, SchemaError
 from utils.cli_chat_reader import (
     classify_blob_data,
+    extract_blob_refs,
     messages_to_bubbles,
     traverse_blobs,
-    _extract_blob_refs,  # internal helper; covered directly alongside classify_blob_data
 )
 from utils.text_extract import extract_text_from_bubble
 
@@ -132,17 +132,39 @@ def _make_meta_value(meta: dict) -> str:
 
 
 def _build_store_db_raw(path: str, meta: dict, blobs: dict[str, bytes]) -> None:
-    """Minimal store.db with arbitrary blob payloads (for traverse_blobs fuzz)."""
+    """Minimal store.db with well-formed meta dict and arbitrary blob payloads."""
+    _build_store_db_meta_row(path, _make_meta_value(meta), blobs)
+
+
+def _build_store_db_meta_row(
+    path: str, meta_row: str | None, blobs: dict[str, bytes]
+) -> None:
+    """Minimal store.db; *meta_row* is the raw ``meta.value`` (hex JSON or adversarial)."""
     conn = sqlite3.connect(path)
     conn.execute("CREATE TABLE meta (key TEXT PRIMARY KEY, value TEXT)")
     conn.execute("CREATE TABLE blobs (id TEXT PRIMARY KEY, data BLOB)")
-    conn.execute("INSERT INTO meta VALUES ('0', ?)", (_make_meta_value(meta),))
+    if meta_row is not None:
+        conn.execute("INSERT INTO meta VALUES ('0', ?)", (meta_row,))
     for blob_id, data in blobs.items():
         conn.execute("INSERT INTO blobs VALUES (?, ?)", (blob_id, data))
     conn.commit()
     conn.close()
 
 
+_FUZZ_META_ROW = st.one_of(
+    st.none(),
+    st.just(""),
+    st.text(min_size=0, max_size=200),
+    st.dictionaries(st.text(max_size=20), _JSON_VALUES, max_size=6).map(
+        lambda d: json.dumps(d).encode("utf-8").hex()
+    ),
+    st.builds(
+        lambda root: _make_meta_value({"latestRootBlobId": root, "createdAt": 1}),
+        _BLOB_ID_HEX,
+    ),
+)
+
+
 def _assemble_workspace_bubble(bubble_id: object, value: object) -> dict | None:
     """Mirror workspace_tabs KV bubble load (json.loads → Bubble.from_dict).
 
@@ -219,7 +241,7 @@ class TestBlobChainParsingFuzz(unittest.TestCase):
     @settings(max_examples=120, deadline=None)
     def test_extract_blob_refs_never_raises(self, data: bytes) -> None:
         try:
-            refs = _extract_blob_refs(data)
+            refs = extract_blob_refs(data)
         except Exception as exc:
             self.fail(f"unexpected {type(exc).__name__}: {exc}")
         self.assertIsInstance(refs, list)
@@ -230,7 +252,7 @@ def test_extract_blob_refs_never_raises(self, data: bytes) -> None:
     @given(data=st.binary(max_size=4096))
     @settings(max_examples=80, deadline=None)
     def test_extract_blob_refs_is_idempotent(self, data: bytes) -> None:
-        self.assertEqual(_extract_blob_refs(data), _extract_blob_refs(data))
+        self.assertEqual(extract_blob_refs(data), extract_blob_refs(data))
 
     @given(data=st.binary(max_size=4096))
     @settings(max_examples=80, deadline=None)
@@ -273,6 +295,23 @@ def test_traverse_blobs_never_raises(
                 self.fail(f"traverse_blobs raised {type(exc).__name__}: {exc}")
             self.assertIsInstance(messages, list)
 
+    @given(meta_row=_FUZZ_META_ROW)
+    @settings(
+        max_examples=30,
+        deadline=None,
+        suppress_health_check=[HealthCheck.too_slow],
+    )
+    def test_traverse_blobs_meta_parse_never_raises(self, meta_row: str | None) -> None:
+        """Covers meta decode / CliSessionMeta.from_dict failure → return [] (no crash)."""
+        with tempfile.TemporaryDirectory() as td:
+            db_path = os.path.join(td, "store.db")
+            _build_store_db_meta_row(db_path, meta_row, {})
+            try:
+                messages = traverse_blobs(db_path)
+            except Exception as exc:
+                self.fail(f"traverse_blobs raised {type(exc).__name__}: {exc}")
+            self.assertIsInstance(messages, list)
+
 
 class TestTextExtractionFuzz(unittest.TestCase):
     @given(bubble=_BUBBLE_LIKE)
diff --git a/tests/test_cli_chat_reader.py b/tests/test_cli_chat_reader.py
index afc182c..ce07d42 100644
--- a/tests/test_cli_chat_reader.py
+++ b/tests/test_cli_chat_reader.py
@@ -20,7 +20,7 @@
 
 from utils.cli_chat_reader import (
     _content_to_text,
-    _extract_blob_refs,
+    extract_blob_refs,
     _extract_tool_calls,
     _strip_user_info,
     aggregate_session_stats,
@@ -75,34 +75,34 @@ def _build_store_db(path: str, meta: dict, json_blobs: dict[str, dict], chain: d
 
 
 # ---------------------------------------------------------------------------
-# _extract_blob_refs
+# extract_blob_refs
 # ---------------------------------------------------------------------------
 
 class TestExtractBlobRefs(unittest.TestCase):
     def test_empty_bytes_returns_empty(self):
-        self.assertEqual(_extract_blob_refs(b""), [])
+        self.assertEqual(extract_blob_refs(b""), [])
 
     def test_single_ref(self):
         ref = "a" * 64  # 32 bytes as hex
         raw = b"\x0a\x20" + bytes.fromhex(ref)
-        self.assertEqual(_extract_blob_refs(raw), [ref])
+        self.assertEqual(extract_blob_refs(raw), [ref])
 
     def test_two_refs(self):
         ref1 = "a" * 64
         ref2 = "b" * 64
         raw = b"\x0a\x20" + bytes.fromhex(ref1) + b"\x0a\x20" + bytes.fromhex(ref2)
-        self.assertEqual(_extract_blob_refs(raw), [ref1, ref2])
+        self.assertEqual(extract_blob_refs(raw), [ref1, ref2])
 
     def test_noise_bytes_ignored(self):
         ref = "c" * 64
         noise = b"\x00\xff\x01\x02\x03\x04"
         raw = noise + b"\x0a\x20" + bytes.fromhex(ref) + b"\xde\xad"
-        self.assertIn(ref, _extract_blob_refs(raw))
+        self.assertIn(ref, extract_blob_refs(raw))
 
     def test_partial_tag_at_end_ignored(self):
         # Only 0x0a without 0x20 immediately following should not produce a ref.
         raw = b"\x0a" + b"\x00" * 32
-        self.assertEqual(_extract_blob_refs(raw), [])
+        self.assertEqual(extract_blob_refs(raw), [])
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_models.py b/tests/test_models.py
index a15a68e..04a8b84 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -17,7 +17,7 @@
     Workspace,
     WorkspaceLocalComposer,
 )
-from utils.cli_chat_reader import _extract_blob_refs
+from utils.cli_chat_reader import extract_blob_refs
 
 
 GOOD_COMPOSER_RAW: dict = {
@@ -252,7 +252,7 @@ def test_meta_parses_then_blob_chain_extracts_refs(self) -> None:
         self.assertEqual(meta.latest_root_blob_id, ref1)
 
         chain_blob = _make_blob_chain(ref1, ref2, ref3)
-        refs = _extract_blob_refs(chain_blob)
+        refs = extract_blob_refs(chain_blob)
         self.assertEqual(refs, [ref1, ref2, ref3])
 
     def test_blob_chain_skips_non_marker_bytes(self) -> None:
@@ -261,10 +261,10 @@ def test_blob_chain_skips_non_marker_bytes(self) -> None:
         garbage_after = b"\xff\xfe"
         raw = garbage_before + bytes([0x0A, 0x20]) + bytes.fromhex(ref) + garbage_after
 
-        self.assertEqual(_extract_blob_refs(raw), [ref])
+        self.assertEqual(extract_blob_refs(raw), [ref])
 
     def test_blob_chain_empty_returns_empty_list(self) -> None:
-        self.assertEqual(_extract_blob_refs(b""), [])
+        self.assertEqual(extract_blob_refs(b""), [])
 
 
 if __name__ == "__main__":
diff --git a/utils/cli_chat_reader.py b/utils/cli_chat_reader.py
index 1864af7..5c744c4 100644
--- a/utils/cli_chat_reader.py
+++ b/utils/cli_chat_reader.py
@@ -62,7 +62,7 @@ def _read_meta(db_path: str) -> dict:
     return {}
 
 
-def _extract_blob_refs(data: bytes) -> list[str]:
+def extract_blob_refs(data: bytes) -> list[str]:
     """Extract all 32-byte (SHA-256) blob references from a binary chain node.
 
     The encoding is: tag ``0x0a`` (field 1, length-delimited) followed by
@@ -84,7 +84,7 @@ def classify_blob_data(data: bytes) -> tuple[dict | None, list[str]]:
 
     Returns ``(message_dict, [])`` when *data* decodes to a dict with a
     ``role`` field; otherwise ``(None, refs)`` where *refs* are SHA-256 hex
-    ids from :func:`_extract_blob_refs`.  Used by :func:`traverse_blobs` and
+    ids from :func:`extract_blob_refs`.  Used by :func:`traverse_blobs` and
     property tests — keep in sync when the load loop changes.
     """
     try:
@@ -93,7 +93,7 @@ def classify_blob_data(data: bytes) -> tuple[dict | None, list[str]]:
             return msg, []
     except (UnicodeDecodeError, json.JSONDecodeError):
         pass
-    return None, _extract_blob_refs(data)
+    return None, extract_blob_refs(data)
 
 
 def traverse_blobs(db_path: str) -> list[dict]:

From ca3325922940f383457b10940ba3bfc4618e992c Mon Sep 17 00:00:00 2001
From: bradjin8 <headit74@hotmail.com>
Date: Wed, 27 May 2026 12:46:23 -0400
Subject: [PATCH 9/9] fix: comment slightly misleading

---
 .github/workflows/tests.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 2342a44..924c3b7 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -98,14 +98,13 @@ jobs:
 
       - name: Install runtime + test dependencies
         # Install from the pinned lock file for deterministic dependency
-        # resolution (closes #47). pytest is added on top — it is not in
-        # requirements-lock.txt because it is a dev-only dep. pywebview is
+        # resolution (closes #47). pytest and hypothesis are added on top — not in
+        # requirements-lock.txt (dev-only). pywebview is
         # the desktop-launcher dep and pulls GTK / Qt system libraries on
         # Linux — intentionally excluded from the CI unittest matrix.
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements-lock.txt
-          # Dev-only (not in requirements-lock.txt): pytest + hypothesis for unittest/property tests.
           python -m pip install 'pytest>=8,<9' 'hypothesis>=6.100,<7'
 
       - name: Run unittest suite