Skip to content

Commit eeec613

Browse files
committed
refactor(cache): optimize SqliteDataCache with lazy deserialization and per-section tables
Replace single cache_entries table with per-section tables (libdoc, variables, resource, namespace) for better query performance. Introduce CacheEntry with lazy meta/data deserialization — data blobs are only unpickled on cache hit, avoiding expensive deserialization on meta mismatch. Move version checking from per-entry meta_version field to DB-level app_version parameter. On version mismatch all tables are dropped and recreated automatically. Simplify cache API from 3 methods (cache_data_exists, read_cache_data, save_cache_data) to 2 (read_entry, save_entry), reducing DB round-trips from 3 to 1 for reads and 2 to 1 for writes. Remove filepath_base property (adler32 hash-based keys) from all metadata classes — cache keys now use source paths or library names directly.
1 parent b050c32 commit eeec613

5 files changed

Lines changed: 318 additions & 356 deletions

File tree

packages/robot/src/robotcode/robot/diagnostics/data_cache.py

Lines changed: 102 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import pickle
22
import sqlite3
3-
from abc import ABC, abstractmethod
43
from enum import Enum
54
from pathlib import Path
6-
from typing import Any, Tuple, Type, TypeVar, Union, cast
5+
from typing import Any, Generic, Optional, Tuple, Type, TypeVar, Union, cast
76

8-
from robotcode.core.utils.dataclasses import as_json, from_json
9-
10-
_T = TypeVar("_T")
7+
_M = TypeVar("_M")
8+
_D = TypeVar("_D")
119

1210

1311
class CacheSection(Enum):
@@ -17,90 +15,64 @@ class CacheSection(Enum):
1715
NAMESPACE = "namespace"
1816

1917

20-
class DataCache(ABC):
21-
@abstractmethod
22-
def cache_data_exists(self, section: CacheSection, entry_name: str) -> bool: ...
23-
24-
@abstractmethod
25-
def read_cache_data(
26-
self, section: CacheSection, entry_name: str, types: Union[Type[_T], Tuple[Type[_T], ...]]
27-
) -> _T: ...
28-
29-
@abstractmethod
30-
def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) -> None: ...
31-
32-
def close(self) -> None:
33-
pass
34-
35-
36-
class FileCacheDataBase(DataCache, ABC):
37-
def __init__(self, cache_dir: Path) -> None:
38-
self.cache_dir = cache_dir
39-
40-
if not Path.exists(self.cache_dir):
41-
Path.mkdir(self.cache_dir, parents=True)
42-
Path(self.cache_dir / ".gitignore").write_text(
43-
"# Created by robotcode\n*\n",
44-
"utf-8",
45-
)
46-
47-
48-
class JsonDataCache(FileCacheDataBase):
49-
def build_cache_data_filename(self, section: CacheSection, entry_name: str) -> Path:
50-
return self.cache_dir / section.value / (entry_name + ".json")
51-
52-
def cache_data_exists(self, section: CacheSection, entry_name: str) -> bool:
53-
cache_file = self.build_cache_data_filename(section, entry_name)
54-
return cache_file.exists()
55-
56-
def read_cache_data(
57-
self, section: CacheSection, entry_name: str, types: Union[Type[_T], Tuple[Type[_T], ...]]
58-
) -> _T:
59-
cache_file = self.build_cache_data_filename(section, entry_name)
60-
return from_json(cache_file.read_text("utf-8"), types)
61-
62-
def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) -> None:
63-
cached_file = self.build_cache_data_filename(section, entry_name)
64-
65-
cached_file.parent.mkdir(parents=True, exist_ok=True)
66-
cached_file.write_text(as_json(data), "utf-8")
67-
68-
69-
class PickleDataCache(FileCacheDataBase):
70-
def build_cache_data_filename(self, section: CacheSection, entry_name: str) -> Path:
71-
return self.cache_dir / section.value / (entry_name + ".pkl")
72-
73-
def cache_data_exists(self, section: CacheSection, entry_name: str) -> bool:
74-
cache_file = self.build_cache_data_filename(section, entry_name)
75-
return cache_file.exists()
76-
77-
def read_cache_data(
78-
self, section: CacheSection, entry_name: str, types: Union[Type[_T], Tuple[Type[_T], ...]]
79-
) -> _T:
80-
cache_file = self.build_cache_data_filename(section, entry_name)
81-
82-
with cache_file.open("rb") as f:
83-
result = pickle.load(f)
84-
85-
if isinstance(result, types):
86-
return cast(_T, result)
87-
88-
raise TypeError(f"Expected {types} but got {type(result)}")
89-
90-
def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) -> None:
91-
cached_file = self.build_cache_data_filename(section, entry_name)
92-
93-
cached_file.parent.mkdir(parents=True, exist_ok=True)
94-
with cached_file.open("wb") as f:
95-
pickle.dump(data, f)
96-
97-
98-
class SqliteDataCache(DataCache):
99-
"""Cache backend using a single SQLite database with zlib-compressed pickle blobs."""
100-
101-
_SCHEMA_VERSION = 1
102-
103-
def __init__(self, cache_dir: Path) -> None:
18+
class CacheEntry(Generic[_M, _D]):
19+
"""Lazy-deserializing cache entry.
20+
21+
Meta and data blobs are deserialized on first property access, not when read from DB.
22+
"""
23+
24+
def __init__(
25+
self,
26+
meta_blob: Optional[bytes],
27+
data_blob: bytes,
28+
meta_type: Union[Type[_M], Tuple[Type[_M], ...]],
29+
data_type: Union[Type[_D], Tuple[Type[_D], ...]],
30+
) -> None:
31+
self._meta_blob = meta_blob
32+
self._data_blob = data_blob
33+
self._meta_type = meta_type
34+
self._data_type = data_type
35+
self._meta_cache: Optional[_M] = None
36+
self._data_cache: Optional[_D] = None
37+
self._meta_loaded = False
38+
self._data_loaded = False
39+
40+
@property
41+
def meta(self) -> Optional[_M]:
42+
if not self._meta_loaded:
43+
if self._meta_blob is not None:
44+
result = pickle.loads(self._meta_blob)
45+
if not isinstance(result, self._meta_type):
46+
raise TypeError(f"Expected {self._meta_type} but got {type(result)}")
47+
self._meta_cache = cast(_M, result)
48+
self._meta_loaded = True
49+
return self._meta_cache
50+
51+
@property
52+
def data(self) -> _D:
53+
if not self._data_loaded:
54+
result = pickle.loads(self._data_blob)
55+
if not isinstance(result, self._data_type):
56+
raise TypeError(f"Expected {self._data_type} but got {type(result)}")
57+
self._data_cache = cast(_D, result)
58+
self._data_loaded = True
59+
60+
assert self._data_cache is not None
61+
return self._data_cache
62+
63+
64+
_TABLE_NAMES = [s.value for s in CacheSection]
65+
66+
67+
class SqliteDataCache:
68+
"""Cache backend using a single SQLite database with per-section tables.
69+
70+
Each CacheSection gets its own table with entry_name as PK, plus meta and data
71+
BLOB columns. An app_version is stored in a metadata table; on version mismatch
72+
all tables are dropped and recreated.
73+
"""
74+
75+
def __init__(self, cache_dir: Path, app_version: str = "") -> None:
10476
self.cache_dir = cache_dir
10577

10678
if not cache_dir.exists():
@@ -116,46 +88,55 @@ def __init__(self, cache_dir: Path) -> None:
11688
self._conn.execute("PRAGMA synchronous=NORMAL")
11789
self._conn.execute("PRAGMA cache_size=-8000")
11890
self._conn.execute("PRAGMA mmap_size=67108864")
119-
self._conn.execute(
120-
"CREATE TABLE IF NOT EXISTS cache_entries ("
121-
" section TEXT NOT NULL,"
122-
" entry_name TEXT NOT NULL,"
123-
" data BLOB NOT NULL,"
124-
" PRIMARY KEY (section, entry_name)"
125-
")"
126-
)
127-
self._conn.commit()
12891

129-
def cache_data_exists(self, section: CacheSection, entry_name: str) -> bool:
130-
row = self._conn.execute(
131-
"SELECT 1 FROM cache_entries WHERE section = ? AND entry_name = ?",
132-
(section.value, entry_name),
133-
).fetchone()
134-
return row is not None
92+
self._ensure_schema(app_version)
13593

136-
def read_cache_data(
137-
self, section: CacheSection, entry_name: str, types: Union[Type[_T], Tuple[Type[_T], ...]]
138-
) -> _T:
139-
row = self._conn.execute(
140-
"SELECT data FROM cache_entries WHERE section = ? AND entry_name = ?",
141-
(section.value, entry_name),
142-
).fetchone()
94+
def _ensure_schema(self, app_version: str) -> None:
95+
self._conn.execute("CREATE TABLE IF NOT EXISTS _meta ( key TEXT PRIMARY KEY, value TEXT NOT NULL)")
14396

144-
if row is None:
145-
raise FileNotFoundError(f"No cache entry for {section.value}/{entry_name}")
97+
row = self._conn.execute("SELECT value FROM _meta WHERE key = 'app_version'").fetchone()
98+
stored_version = row[0] if row else None
14699

147-
result = pickle.loads(row[0])
100+
if stored_version != app_version:
101+
for table in _TABLE_NAMES:
102+
self._conn.execute(f"DROP TABLE IF EXISTS {table}")
103+
self._conn.execute("INSERT OR REPLACE INTO _meta (key, value) VALUES ('app_version', ?)", (app_version,))
148104

149-
if isinstance(result, types):
150-
return cast(_T, result)
105+
for table in _TABLE_NAMES:
106+
self._conn.execute(
107+
f"CREATE TABLE IF NOT EXISTS {table} ( entry_name TEXT PRIMARY KEY, meta BLOB, data BLOB NOT NULL)"
108+
)
109+
self._conn.commit()
151110

152-
raise TypeError(f"Expected {types} but got {type(result)}")
111+
def read_entry(
112+
self,
113+
section: CacheSection,
114+
entry_name: str,
115+
meta_type: Union[Type[_M], Tuple[Type[_M], ...]],
116+
data_type: Union[Type[_D], Tuple[Type[_D], ...]],
117+
) -> Optional[CacheEntry[_M, _D]]:
118+
row = self._conn.execute(
119+
f"SELECT meta, data FROM {section.value} WHERE entry_name = ?",
120+
(entry_name,),
121+
).fetchone()
153122

154-
def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) -> None:
155-
blob = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
123+
if row is None:
124+
return None
125+
126+
return CacheEntry(row[0], row[1], meta_type, data_type)
127+
128+
def save_entry(
129+
self,
130+
section: CacheSection,
131+
entry_name: str,
132+
meta: Any,
133+
data: Any,
134+
) -> None:
135+
meta_blob = pickle.dumps(meta, protocol=pickle.HIGHEST_PROTOCOL) if meta is not None else None
136+
data_blob = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
156137
self._conn.execute(
157-
"INSERT OR REPLACE INTO cache_entries (section, entry_name, data) VALUES (?, ?, ?)",
158-
(section.value, entry_name, blob),
138+
f"INSERT OR REPLACE INTO {section.value} (entry_name, meta, data) VALUES (?, ?, ?)",
139+
(entry_name, meta_blob, data_blob),
159140
)
160141
self._conn.commit()
161142

packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -482,37 +482,27 @@ def _try_load_cached_namespace(
482482
if not Path(source).exists():
483483
return None
484484

485-
# Compute filepath_base from source path
486-
temp_filepath_base = NamespaceMetaData(
487-
meta_version="",
488-
source=source,
489-
source_mtime_ns=0,
490-
config_fingerprint="",
491-
).filepath_base
492-
493-
meta_file = temp_filepath_base + ".meta"
494-
if not data_cache.cache_data_exists(CacheSection.NAMESPACE, meta_file):
495-
return None
496-
497485
try:
498-
saved_meta = data_cache.read_cache_data(CacheSection.NAMESPACE, meta_file, NamespaceMetaData)
486+
entry = data_cache.read_entry(CacheSection.NAMESPACE, source, NamespaceMetaData, NamespaceData)
499487
except (SystemExit, KeyboardInterrupt):
500488
raise
501489
except BaseException as e:
502490
ex = e
503491
self._logger.debug(
504-
lambda: f"Failed to read namespace meta for {source}: {ex}",
492+
lambda: f"Failed to read namespace cache for {source}: {ex}",
505493
context_name="import",
506494
)
507495
return None
508496

509-
if not imports_manager.validate_namespace_meta(saved_meta):
497+
if entry is None or entry.meta is None:
510498
return None
511499

512-
# Meta is valid — load the full NamespaceData
513-
data_file = temp_filepath_base + ".data"
500+
if not imports_manager.validate_namespace_meta(entry.meta):
501+
return None
502+
503+
# Meta is valid — load the full NamespaceData (lazy deserialization)
514504
try:
515-
namespace_data = data_cache.read_cache_data(CacheSection.NAMESPACE, data_file, NamespaceData)
505+
namespace_data = entry.data
516506
except (SystemExit, KeyboardInterrupt):
517507
raise
518508
except BaseException as e:
@@ -564,11 +554,7 @@ def _save_namespace_to_cache(
564554
data = namespace.to_data()
565555

566556
data_cache = imports_manager.data_cache
567-
data_file = meta.filepath_base + ".data"
568-
meta_file = meta.filepath_base + ".meta"
569-
570-
data_cache.save_cache_data(CacheSection.NAMESPACE, data_file, data)
571-
data_cache.save_cache_data(CacheSection.NAMESPACE, meta_file, meta)
557+
data_cache.save_entry(CacheSection.NAMESPACE, source, meta, data)
572558
except (SystemExit, KeyboardInterrupt):
573559
raise
574560
except BaseException as e:

0 commit comments

Comments
 (0)