Skip to content

Commit b471c5c

Browse files
committed
caches: support compression of cache files
profiles.pickle has become quite fat on disk, getting to 185MB in size. More information on the source of issue can be found in the issue linked below, but I've decided to use "zstd -T0" (with default compression level) to compress the cache files. This should help with the size of the cache files, and the performance hit should be negligible. I've measured the time it takes to load the cache files before and after this change, and the difference is nil. The time is mostly the cost of pickle.load, and the compression/decompression is negligible in comparison. I'm still somewhat concerned about my usage of subprocess.Popen, but I think it's fine. Resolves: #735 Signed-off-by: Arthur Zamarin <arthurzam@gentoo.org>
1 parent c8c1d3a commit b471c5c

2 files changed

Lines changed: 25 additions & 7 deletions

File tree

src/pkgcheck/addons/caches.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pathlib
66
import pickle
77
import shutil
8+
import subprocess
89
from collections import UserDict
910
from dataclasses import dataclass
1011
from hashlib import blake2b
@@ -79,11 +80,21 @@ def cache_file(self, repo):
7980
dirname = f"{repo.repo_id.lstrip(os.sep)}-{token}"
8081
return pjoin(self.options.cache_dir, "repos", dirname, self.cache.file)
8182

82-
def load_cache(self, path, fallback=None):
83+
def load_cache(self, path: str, fallback=None):
8384
cache = fallback
8485
try:
85-
with open(path, "rb") as f:
86-
cache = pickle.load(f)
86+
if path.endswith(".zst"):
87+
if not os.path.exists(path):
88+
raise FileNotFoundError(path)
89+
with subprocess.Popen(("zstd", "-qdcf", path), stdout=subprocess.PIPE) as proc:
90+
if proc.poll():
91+
raise PkgcheckUserException(
92+
f"failed decompressing {self.cache.type} cache: {path!r}"
93+
)
94+
cache = pickle.load(proc.stdout)
95+
else:
96+
with open(path, "rb") as f:
97+
cache = pickle.load(f)
8798
if cache.version != self.cache.version:
8899
logger.debug("forcing %s cache regen due to outdated version", self.cache.type)
89100
os.remove(path)
@@ -98,11 +109,18 @@ def load_cache(self, path, fallback=None):
98109
cache = fallback
99110
return cache
100111

101-
def save_cache(self, data, path):
112+
def save_cache(self, data, path: str):
102113
try:
103114
os.makedirs(os.path.dirname(path), exist_ok=True)
104-
with AtomicWriteFile(path, binary=True) as f:
105-
pickle.dump(data, f, protocol=-1)
115+
if path.endswith(".zst"):
116+
with subprocess.Popen(("zstd", "-T0", "-fqo", path), stdin=subprocess.PIPE) as proc:
117+
pickle.dump(data, proc.stdin, protocol=-1)
118+
if os.path.exists(path[:-4]):
119+
logger.warning("removing old %s cache file", self.cache.type)
120+
os.remove(path[:-4])
121+
else:
122+
with AtomicWriteFile(path, binary=True) as f:
123+
pickle.dump(data, f, protocol=-1)
106124
except IOError as e:
107125
msg = f"failed dumping {self.cache.type} cache: {path!r}: {e.strerror}"
108126
raise PkgcheckUserException(msg)

src/pkgcheck/addons/profiles.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ class ProfileAddon(caches.CachedAddon):
119119
non_profile_dirs = frozenset(["desc", "updates"])
120120

121121
# cache registry
122-
cache = caches.CacheData(type="profiles", file="profiles.pickle", version=2)
122+
cache = caches.CacheData(type="profiles", file="profiles.pickle.zst", version=3)
123123

124124
@classmethod
125125
def mangle_argparser(cls, parser):

0 commit comments

Comments
 (0)