From beab9b27119a02dfb859246c88805ca19d3df827 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Sun, 31 May 2026 22:51:03 +0530 Subject: [PATCH 1/9] repoobj: add chunk_id to blob header, extend to 49 bytes, refs #8572 Stores chunk_id unencrypted in the per-blob header so borg check can rebuild the chunk_id -> pack location index without decryption. AEAD uses chunk_id as additional data, making key-free recovery circular without an explicit plaintext copy. Header layout: OBJ_MAGIC(8) + version(1) + chunk_id(32) + meta_size(4) + data_size(4) = REPOOBJ_HEADER_SIZE = 49 bytes. --- src/borg/repoobj.py | 11 +++++++---- src/borg/testsuite/repository_test.py | 8 ++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/borg/repoobj.py b/src/borg/repoobj.py index ad8e52d6f6..f2a57f95a8 100644 --- a/src/borg/repoobj.py +++ b/src/borg/repoobj.py @@ -13,11 +13,14 @@ OBJ_MAGIC = b"BORG_OBJ" OBJ_VERSION = 0x01 +# Fixed header size per blob: OBJ_MAGIC(8) + version(1) + chunk_id(32) + meta_size(4) + data_size(4) +REPOOBJ_HEADER_SIZE = 49 + class RepoObj: - # Object header: magic (8b), format version (1b), meta size (4b), data size (4b). - obj_header = Struct("<8sBII") - ObjHeader = namedtuple("ObjHeader", "magic version meta_size data_size") + # Object header: magic (8b), format version (1b), chunk_id (32b), meta size (4b), data size (4b). + obj_header = Struct("<8sB32sII") + ObjHeader = namedtuple("ObjHeader", "magic version chunk_id meta_size data_size") @classmethod def extract_crypted_data(cls, data: bytes) -> bytes: @@ -72,7 +75,7 @@ def format( data_encrypted = self.key.encrypt(id, data_compressed) meta_packed = msgpack.packb(meta) meta_encrypted = self.key.encrypt(id, meta_packed) - hdr = self.ObjHeader(OBJ_MAGIC, OBJ_VERSION, len(meta_encrypted), len(data_encrypted)) + hdr = self.ObjHeader(OBJ_MAGIC, OBJ_VERSION, id, len(meta_encrypted), len(data_encrypted)) hdr_packed = self.obj_header.pack(*hdr) return hdr_packed + meta_encrypted + data_encrypted diff --git a/src/borg/testsuite/repository_test.py b/src/borg/testsuite/repository_test.py index 5e15ec7942..becdb36354 100644 --- a/src/borg/testsuite/repository_test.py +++ b/src/borg/testsuite/repository_test.py @@ -53,9 +53,9 @@ def reopen(repository, exclusive: bool | None = True, create=False): ) -def fchunk(data, meta=b""): +def fchunk(data, meta=b"", chunk_id=b"\x00" * 32): # Format chunk: create a raw chunk that has a valid RepoObj layout, but does not use encryption or compression. - hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, len(meta), len(data)) + hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, chunk_id, len(meta), len(data)) assert isinstance(data, bytes) chunk = hdr + meta + data return chunk @@ -65,7 +65,7 @@ def pchunk(chunk): # Parse chunk: extract data and metadata from a raw chunk made by fchunk. hdr_size = RepoObj.obj_header.size hdr = chunk[:hdr_size] - meta_size, data_size = RepoObj.obj_header.unpack(hdr)[2:4] + meta_size, data_size = RepoObj.obj_header.unpack(hdr)[3:5] meta = chunk[hdr_size : hdr_size + meta_size] data = chunk[hdr_size + meta_size : hdr_size + meta_size + data_size] return data, meta @@ -97,7 +97,7 @@ def test_basic_operations(repo_fixtures, request): def test_read_data(repo_fixtures, request): with get_repository_from_fixture(repo_fixtures, request) as repository: meta, data = b"meta", b"data" - hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, len(meta), len(data)) + hdr = RepoObj.obj_header.pack(OBJ_MAGIC, OBJ_VERSION, H(0), len(meta), len(data)) chunk_complete = hdr + meta + data chunk_short = hdr + meta repository.put(H(0), chunk_complete) From 1cb8d99425028e0da4e2892fbbefca300b4153ea Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Sun, 31 May 2026 22:51:16 +0530 Subject: [PATCH 2/9] repository: introduce pack_id, move chunks to packs/ namespace, refs #8572 Introduces pack_id as the borgstore storage key (N=1: pack_id == chunk_id). Chunks move from data/ to packs/ with single-level directory sharding (256 subdirs). check_object() validates the header chunk_id against the pack filename. Adds packs/ to ns_config with levels=[1] and to the permissions maps for no-delete and write-only modes. --- src/borg/repository.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/borg/repository.py b/src/borg/repository.py index 925725bcbb..158aa88d3c 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -124,6 +124,7 @@ def __init__( "data/": {"levels": [data_levels]}, "keys/": {"levels": [0]}, "locks/": {"levels": [0]}, + "packs/": {"levels": [1]}, } # Get permissions from parameter or environment variable permissions = permissions if permissions is not None else os.environ.get("BORG_REPO_PERMISSIONS", "all") @@ -139,6 +140,7 @@ def __init__( "data": "lrw", "keys": "lr", "locks": "lrwD", # borg needs to create/delete a shared lock here + "packs": "lrw", } elif permissions == "write-only": # mostly no reading permissions = { @@ -149,6 +151,7 @@ def __init__( "data": "lw", # no r! "keys": "lr", "locks": "lrwD", # borg needs to create/delete a shared lock here + "packs": "lw", # no r! } elif permissions == "read-only": # mostly r/o permissions = {"": "lr", "locks": "lrwD"} @@ -334,6 +337,8 @@ def check_object(obj): log_error("invalid object magic.") elif hdr.version != OBJ_VERSION: log_error(f"unsupported object version: {hdr.version}.") + elif hdr.chunk_id != hex_to_bin(info.name): + log_error("chunk_id mismatch in header.") else: meta = obj[hdr_size : hdr_size + hdr.meta_size] if hdr.meta_size != len(meta): @@ -376,11 +381,11 @@ def check_object(obj): # As we don't do garbage collection here, this is not a problem. # We also don't know the plaintext size, so we set it to 0. init_entry = ChunkIndexEntry(flags=ChunkIndex.F_USED, size=0) - infos = self.store.list("data") + infos = self.store.list("packs") try: for info in infos: self._lock_refresh() - key = "data/%s" % info.name + key = "packs/%s" % info.name if key <= last_key_checked: # needs sorted keys continue try: @@ -412,8 +417,8 @@ def check_object(obj): # add all existing objects to the index. # borg check: the index may have corrupted objects (we did not delete them) # borg check --repair: the index will only have non-corrupted objects. - id = hex_to_bin(info.name) - chunks[id] = init_entry + pack_id = hex_to_bin(info.name) # N=1: pack_id == chunk_id + chunks[pack_id] = init_entry now = time.monotonic() if now > t_last_checkpoint + 300: # checkpoint every 5 mins t_last_checkpoint = now @@ -456,30 +461,31 @@ def list(self, limit=None, marker=None): """ collect = True if marker is None else False result = [] - infos = self.store.list("data") # generator yielding ItemInfos + infos = self.store.list("packs") # generator yielding ItemInfos while True: self._lock_refresh() try: info = next(infos) except StoreObjectNotFound: - break # can happen e.g. if "data" does not exist, pointless to continue in that case + break # can happen e.g. if "packs" does not exist, pointless to continue in that case except StopIteration: break else: - id = hex_to_bin(info.name) + pack_id = hex_to_bin(info.name) # N=1: pack_id == chunk_id if collect: - result.append((id, info.size)) + result.append((pack_id, info.size)) if len(result) == limit: break - elif id == marker: + elif pack_id == marker: collect = True # note: do not collect the marker id return result def get(self, id, read_data=True, raise_missing=True): self._lock_refresh() + pack_id = id # N=1: pack_id == chunk_id id_hex = bin_to_hex(id) - key = "data/" + id_hex + key = "packs/" + bin_to_hex(pack_id) try: if read_data: # read everything @@ -523,7 +529,8 @@ def put(self, id, data, wait=True): if data_size > MAX_DATA_SIZE: raise IntegrityError(f"More than allowed put data [{data_size} > {MAX_DATA_SIZE}]") - key = "data/" + bin_to_hex(id) + pack_id = id # N=1: pack_id == chunk_id + key = "packs/" + bin_to_hex(pack_id) self.store.store(key, data) def delete(self, id, wait=True): @@ -533,7 +540,8 @@ def delete(self, id, wait=True): deal with async results / exceptions later. """ self._lock_refresh() - key = "data/" + bin_to_hex(id) + pack_id = id # N=1: pack_id == chunk_id + key = "packs/" + bin_to_hex(pack_id) try: self.store.delete(key) except StoreObjectNotFound: From 05ce0a1897889e267dd776a0df5701e1b37344b7 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Mon, 1 Jun 2026 00:47:14 +0530 Subject: [PATCH 3/9] repository: add BORGPACK pack header, bump repo version to 4, refs #8572 Wrap each pack file in a 13-byte header (magic + version + blob_len) so packs are self-identifying and the [len][blob] unit extends to N>1 without a format revision. Bump version 3->4: packs/ and 49-byte ObjHeader are incompatible with version-3 readers. Fix test_extra_chunks chunk_id mismatch. --- src/borg/repository.py | 80 +++++++++++++------ src/borg/testsuite/archiver/check_cmd_test.py | 5 +- 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/src/borg/repository.py b/src/borg/repository.py index 158aa88d3c..e7fb330f48 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -17,9 +17,16 @@ from .storelocking import Lock from .logger import create_logger from .manifest import NoManifestError +from struct import Struct + from .repoobj import RepoObj, OBJ_MAGIC, OBJ_VERSION from .crypto.key import is_keyfile +PACK_MAGIC = b"BORGPACK" +PACK_VERSION = 0x01 +_pack_header = Struct("<8sBI") # magic(8) + version(1) + blob_len(4) +PACK_HEADER_SIZE = _pack_header.size # 13 bytes + logger = create_logger(__name__) @@ -174,7 +181,7 @@ def __init__( self._send_log = send_log_cb or (lambda: None) self.do_create = create self.created = False - self.acceptable_repo_versions = (3,) + self.acceptable_repo_versions = (4,) self.opened = False self.lock = None self.do_lock = lock @@ -212,10 +219,10 @@ def create(self): self.store.open() try: self.store.store("config/readme", REPOSITORY_README.encode()) - self.version = 3 + self.version = 4 self.store.store("config/version", str(self.version).encode()) self.store.store("config/id", bin_to_hex(os.urandom(32)).encode()) - # we know repo/data/ still does not have any chunks stored in it, + # we know repo/packs/ still does not have any chunks stored in it, # but for some stores, there might be a lot of empty directories and # listing them all might be rather slow, so we better cache an empty # ChunkIndex from here so that the first repo operation does not have @@ -329,25 +336,38 @@ def log_error(msg): def check_object(obj): """Check if obj looks valid.""" + if len(obj) < PACK_HEADER_SIZE: + log_error("too small.") + return + magic, version, blob_len = _pack_header.unpack(obj[:PACK_HEADER_SIZE]) + if magic != PACK_MAGIC: + log_error("invalid pack magic.") + return + if version != PACK_VERSION: + log_error(f"unsupported pack version: {version}.") + return + blob = obj[PACK_HEADER_SIZE:] + if len(blob) != blob_len: + log_error(f"pack blob_len mismatch: header says {blob_len}, actual {len(blob)}.") + return hdr_size = RepoObj.obj_header.size - obj_size = len(obj) - if obj_size >= hdr_size: - hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(obj[:hdr_size])) - if hdr.magic != OBJ_MAGIC: - log_error("invalid object magic.") - elif hdr.version != OBJ_VERSION: - log_error(f"unsupported object version: {hdr.version}.") - elif hdr.chunk_id != hex_to_bin(info.name): - log_error("chunk_id mismatch in header.") - else: - meta = obj[hdr_size : hdr_size + hdr.meta_size] - if hdr.meta_size != len(meta): - log_error("metadata size mismatch.") - data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size] - if hdr.data_size != len(data): - log_error("data size mismatch.") - else: + if len(blob) < hdr_size: log_error("too small.") + return + hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(blob[:hdr_size])) + if hdr.magic != OBJ_MAGIC: + log_error("invalid object magic.") + elif hdr.version != OBJ_VERSION: + log_error(f"unsupported object version: {hdr.version}.") + elif hdr.chunk_id != hex_to_bin(info.name): + log_error("chunk_id mismatch in header.") + else: + meta = blob[hdr_size : hdr_size + hdr.meta_size] + if hdr.meta_size != len(meta): + log_error("metadata size mismatch.") + data = blob[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size] + if hdr.data_size != len(data): + log_error("data size mismatch.") # TODO: progress indicator, ... partial = bool(max_duration) @@ -488,14 +508,15 @@ def get(self, id, read_data=True, raise_missing=True): key = "packs/" + bin_to_hex(pack_id) try: if read_data: - # read everything - return self.store.load(key) + raw = self.store.load(key) + return raw[PACK_HEADER_SIZE:] else: # RepoObj layout supports separately encrypted metadata and data. # We return enough bytes so the client can decrypt the metadata. hdr_size = RepoObj.obj_header.size extra_size = 1024 - hdr_size # load a bit more, 1024b, reduces round trips - obj = self.store.load(key, size=hdr_size + extra_size) + raw = self.store.load(key, size=PACK_HEADER_SIZE + hdr_size + extra_size) + obj = raw[PACK_HEADER_SIZE:] hdr = obj[0:hdr_size] if len(hdr) != hdr_size: raise IntegrityError(f"Object too small [id {id_hex}]: expected {hdr_size}, got {len(hdr)} bytes") @@ -503,7 +524,8 @@ def get(self, id, read_data=True, raise_missing=True): if meta_size > extra_size: # we did not get enough, need to load more, but not all. # this should be rare, as chunk metadata is rather small usually. - obj = self.store.load(key, size=hdr_size + meta_size) + raw = self.store.load(key, size=PACK_HEADER_SIZE + hdr_size + meta_size) + obj = raw[PACK_HEADER_SIZE:] meta = obj[hdr_size : hdr_size + meta_size] if len(meta) != meta_size: raise IntegrityError(f"Object too small [id {id_hex}]: expected {meta_size}, got {len(meta)} bytes") @@ -531,13 +553,21 @@ def put(self, id, data, wait=True): pack_id = id # N=1: pack_id == chunk_id key = "packs/" + bin_to_hex(pack_id) - self.store.store(key, data) + pack_hdr = _pack_header.pack(PACK_MAGIC, PACK_VERSION, data_size) + self.store.store(key, pack_hdr + data) def delete(self, id, wait=True): """delete a repo object Note: when doing calls with wait=False this gets async and caller must deal with async results / exceptions later. + + N=1: pack_id == chunk_id, so deleting the pack file is equivalent to + deleting the chunk. Hard delete is safe here. + N>1: a pack contains multiple chunks. Individual chunks cannot be deleted + from a pack without rewriting it. This method must become a soft-delete + (no-op) before N>1 is implemented; compact() will then be the sole + mechanism for reclaiming space based on live-ratio thresholds. """ self._lock_refresh() pack_id = id # N=1: pack_id == chunk_id diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index aeccfd91d2..7d6290c90f 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -351,8 +351,9 @@ def test_extra_chunks(archivers, request): check_cmd_setup(archiver) cmd(archiver, "check", exit_code=0) with Repository(archiver.repository_location, exclusive=True) as repository: - chunk = fchunk(b"xxxx") - repository.put(b"01234567890123456789012345678901", chunk) + key = b"01234567890123456789012345678901" + chunk = fchunk(b"xxxx", chunk_id=key) + repository.put(key, chunk) cmd(archiver, "check", "-v", exit_code=0) # check does not deal with orphans anymore From 707f26b8e33eb20a8b88145a3e69d4ff9460042b Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Mon, 1 Jun 2026 01:06:08 +0530 Subject: [PATCH 4/9] repository: address review: drop data/ namespace, fix chunk_id/pack_id semantics, refs #8572 --- src/borg/repository.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/borg/repository.py b/src/borg/repository.py index e7fb330f48..d684035313 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -122,13 +122,10 @@ def __init__( location = Location(url) self._location = location self.url = url - # lots of stuff in data: use 2 levels by default (data/00/00/ .. data/ff/ff/ dirs)! - data_levels = int(os.environ.get("BORG_STORE_DATA_LEVELS", "2")) ns_config = { "archives/": {"levels": [0]}, "cache/": {"levels": [0]}, "config/": {"levels": [0]}, - "data/": {"levels": [data_levels]}, "keys/": {"levels": [0]}, "locks/": {"levels": [0]}, "packs/": {"levels": [1]}, @@ -144,7 +141,6 @@ def __init__( "archives": "lrw", "cache": "lrwWD", # WD for chunks., last-key-checked, ... "config": "lrW", # W for manifest - "data": "lrw", "keys": "lr", "locks": "lrwD", # borg needs to create/delete a shared lock here "packs": "lrw", @@ -155,7 +151,6 @@ def __init__( "archives": "lw", "cache": "lrwWD", # read allowed, e.g. for chunks. cache "config": "lrW", # W for manifest - "data": "lw", # no r! "keys": "lr", "locks": "lrwD", # borg needs to create/delete a shared lock here "packs": "lw", # no r! @@ -359,8 +354,6 @@ def check_object(obj): log_error("invalid object magic.") elif hdr.version != OBJ_VERSION: log_error(f"unsupported object version: {hdr.version}.") - elif hdr.chunk_id != hex_to_bin(info.name): - log_error("chunk_id mismatch in header.") else: meta = blob[hdr_size : hdr_size + hdr.meta_size] if hdr.meta_size != len(meta): @@ -437,8 +430,9 @@ def check_object(obj): # add all existing objects to the index. # borg check: the index may have corrupted objects (we did not delete them) # borg check --repair: the index will only have non-corrupted objects. - pack_id = hex_to_bin(info.name) # N=1: pack_id == chunk_id - chunks[pack_id] = init_entry + pack_id = hex_to_bin(info.name) + chunk_id = pack_id # N=1: chunk_id == pack_id + chunks[chunk_id] = init_entry now = time.monotonic() if now > t_last_checkpoint + 300: # checkpoint every 5 mins t_last_checkpoint = now @@ -462,7 +456,7 @@ def check_object(obj): self, chunks, incremental=False, clear=True, force_write=True, delete_other=True ) except StoreObjectNotFound: - # it can be that there is no "data/" at all, then it crashes when iterating infos. + # it can be that there is no "packs/" at all, then it crashes when iterating infos. pass logger.info(f"Checked {objs_checked} repository objects, {objs_errors} errors.") if objs_errors == 0: @@ -491,12 +485,13 @@ def list(self, limit=None, marker=None): except StopIteration: break else: - pack_id = hex_to_bin(info.name) # N=1: pack_id == chunk_id + pack_id = hex_to_bin(info.name) + chunk_id = pack_id # N=1: chunk_id == pack_id if collect: - result.append((pack_id, info.size)) + result.append((chunk_id, info.size)) if len(result) == limit: break - elif pack_id == marker: + elif chunk_id == marker: collect = True # note: do not collect the marker id return result From 395939b551166be135f13b7df8b60696102ba358 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Mon, 1 Jun 2026 01:22:30 +0530 Subject: [PATCH 5/9] archiver: accept version 4 repos for -r/--repo, refs #8572 _common.py had a hard-coded version check that only allowed v3. Now that repository.py creates v4 repos, every archiver command failed to open the repo. Extend the guard to (3, 4). The --other-repo check (v1 or v3 for borg transfer source) is intentionally left unchanged. --- src/borg/archiver/_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/borg/archiver/_common.py b/src/borg/archiver/_common.py index 7aba725e28..452063f9d2 100644 --- a/src/borg/archiver/_common.py +++ b/src/borg/archiver/_common.py @@ -127,9 +127,9 @@ def wrapper(self, args, **kwargs): ) with repository: - if repository.version not in (3,): + if repository.version not in (3, 4): raise Error( - f"This borg version only accepts version 3 repos for -r/--repo, " + f"This borg version only accepts version 3 or 4 repos for -r/--repo, " f"but not version {repository.version}. " f"You can use 'borg transfer' to copy archives from old to new repos." ) From 2561acdba3cb2bdc2dec0f2cdfcf5389f7f427c1 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Mon, 1 Jun 2026 16:52:41 +0530 Subject: [PATCH 6/9] archiver: v4-only for -r/--repo, accept v4 in --other-repo, fix TypeError in error message --- src/borg/archiver/_common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/borg/archiver/_common.py b/src/borg/archiver/_common.py index 452063f9d2..051f5454e8 100644 --- a/src/borg/archiver/_common.py +++ b/src/borg/archiver/_common.py @@ -127,9 +127,9 @@ def wrapper(self, args, **kwargs): ) with repository: - if repository.version not in (3, 4): + if repository.version not in (4,): raise Error( - f"This borg version only accepts version 3 or 4 repos for -r/--repo, " + f"This borg version only accepts version 4 repos for -r/--repo, " f"but not version {repository.version}. " f"You can use 'borg transfer' to copy archives from old to new repos." ) @@ -194,10 +194,10 @@ def wrapper(self, args, **kwargs): ) with repository: - acceptable_versions = (1,) if v1_legacy else (3,) + acceptable_versions = (1,) if v1_legacy else (3, 4) if repository.version not in acceptable_versions: raise Error( - f"This borg version only accepts version {' or '.join(acceptable_versions)} " + f"This borg version only accepts version {' or '.join(str(v) for v in acceptable_versions)} " f"repos for --other-repo." ) kwargs["other_repository"] = repository From 0a9913b658ae1dc348debf0c98a8da8705e94a5d Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Mon, 1 Jun 2026 18:29:49 +0530 Subject: [PATCH 7/9] repository: drop PACK_HEADER wrapper, pack file is the raw RepoObj, refs #8572 accept only repo version 4 --- src/borg/archiver/_common.py | 2 +- src/borg/repository.py | 48 ++++++------------------------------ 2 files changed, 9 insertions(+), 41 deletions(-) diff --git a/src/borg/archiver/_common.py b/src/borg/archiver/_common.py index 051f5454e8..769be095ed 100644 --- a/src/borg/archiver/_common.py +++ b/src/borg/archiver/_common.py @@ -194,7 +194,7 @@ def wrapper(self, args, **kwargs): ) with repository: - acceptable_versions = (1,) if v1_legacy else (3, 4) + acceptable_versions = (1,) if v1_legacy else (4,) if repository.version not in acceptable_versions: raise Error( f"This borg version only accepts version {' or '.join(str(v) for v in acceptable_versions)} " diff --git a/src/borg/repository.py b/src/borg/repository.py index d684035313..db8ed214de 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -17,16 +17,9 @@ from .storelocking import Lock from .logger import create_logger from .manifest import NoManifestError -from struct import Struct - from .repoobj import RepoObj, OBJ_MAGIC, OBJ_VERSION from .crypto.key import is_keyfile -PACK_MAGIC = b"BORGPACK" -PACK_VERSION = 0x01 -_pack_header = Struct("<8sBI") # magic(8) + version(1) + blob_len(4) -PACK_HEADER_SIZE = _pack_header.size # 13 bytes - logger = create_logger(__name__) @@ -331,34 +324,20 @@ def log_error(msg): def check_object(obj): """Check if obj looks valid.""" - if len(obj) < PACK_HEADER_SIZE: - log_error("too small.") - return - magic, version, blob_len = _pack_header.unpack(obj[:PACK_HEADER_SIZE]) - if magic != PACK_MAGIC: - log_error("invalid pack magic.") - return - if version != PACK_VERSION: - log_error(f"unsupported pack version: {version}.") - return - blob = obj[PACK_HEADER_SIZE:] - if len(blob) != blob_len: - log_error(f"pack blob_len mismatch: header says {blob_len}, actual {len(blob)}.") - return hdr_size = RepoObj.obj_header.size - if len(blob) < hdr_size: + if len(obj) < hdr_size: log_error("too small.") return - hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(blob[:hdr_size])) + hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(obj[:hdr_size])) if hdr.magic != OBJ_MAGIC: log_error("invalid object magic.") elif hdr.version != OBJ_VERSION: log_error(f"unsupported object version: {hdr.version}.") else: - meta = blob[hdr_size : hdr_size + hdr.meta_size] + meta = obj[hdr_size : hdr_size + hdr.meta_size] if hdr.meta_size != len(meta): log_error("metadata size mismatch.") - data = blob[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size] + data = obj[hdr_size + hdr.meta_size : hdr_size + hdr.meta_size + hdr.data_size] if hdr.data_size != len(data): log_error("data size mismatch.") @@ -503,15 +482,13 @@ def get(self, id, read_data=True, raise_missing=True): key = "packs/" + bin_to_hex(pack_id) try: if read_data: - raw = self.store.load(key) - return raw[PACK_HEADER_SIZE:] + return self.store.load(key) else: # RepoObj layout supports separately encrypted metadata and data. # We return enough bytes so the client can decrypt the metadata. hdr_size = RepoObj.obj_header.size extra_size = 1024 - hdr_size # load a bit more, 1024b, reduces round trips - raw = self.store.load(key, size=PACK_HEADER_SIZE + hdr_size + extra_size) - obj = raw[PACK_HEADER_SIZE:] + obj = self.store.load(key, size=hdr_size + extra_size) hdr = obj[0:hdr_size] if len(hdr) != hdr_size: raise IntegrityError(f"Object too small [id {id_hex}]: expected {hdr_size}, got {len(hdr)} bytes") @@ -519,8 +496,7 @@ def get(self, id, read_data=True, raise_missing=True): if meta_size > extra_size: # we did not get enough, need to load more, but not all. # this should be rare, as chunk metadata is rather small usually. - raw = self.store.load(key, size=PACK_HEADER_SIZE + hdr_size + meta_size) - obj = raw[PACK_HEADER_SIZE:] + obj = self.store.load(key, size=hdr_size + meta_size) meta = obj[hdr_size : hdr_size + meta_size] if len(meta) != meta_size: raise IntegrityError(f"Object too small [id {id_hex}]: expected {meta_size}, got {len(meta)} bytes") @@ -548,21 +524,13 @@ def put(self, id, data, wait=True): pack_id = id # N=1: pack_id == chunk_id key = "packs/" + bin_to_hex(pack_id) - pack_hdr = _pack_header.pack(PACK_MAGIC, PACK_VERSION, data_size) - self.store.store(key, pack_hdr + data) + self.store.store(key, data) def delete(self, id, wait=True): """delete a repo object Note: when doing calls with wait=False this gets async and caller must deal with async results / exceptions later. - - N=1: pack_id == chunk_id, so deleting the pack file is equivalent to - deleting the chunk. Hard delete is safe here. - N>1: a pack contains multiple chunks. Individual chunks cannot be deleted - from a pack without rewriting it. This method must become a soft-delete - (no-op) before N>1 is implemented; compact() will then be the sole - mechanism for reclaiming space based on live-ratio thresholds. """ self._lock_refresh() pack_id = id # N=1: pack_id == chunk_id From a4eac0b62c189d72ea9a4a46340ab2e68df54ea3 Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Mon, 1 Jun 2026 23:56:26 +0530 Subject: [PATCH 8/9] testsuite: move manifest corruption offset into data_encrypted region, refs #8572 Corruption at offset 123 lands inside meta_encrypted (header is 49 bytes), causing extract_crypted_data to return a shifted slice whose first byte is a random AES-OCB ciphertext byte. When that byte equals 0x02 (PlaintextKey type) key detection silently selects the wrong key, leading to a flaky IntegrityError in rebuild_archives. Move the insertion point to offset 250, which is safely inside data_encrypted for any realistic manifest size, so key detection always reads the correct type byte and the corruption is caught by AEAD authentication instead. --- src/borg/testsuite/archiver/check_cmd_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/borg/testsuite/archiver/check_cmd_test.py b/src/borg/testsuite/archiver/check_cmd_test.py index 7d6290c90f..162b4c1986 100644 --- a/src/borg/testsuite/archiver/check_cmd_test.py +++ b/src/borg/testsuite/archiver/check_cmd_test.py @@ -225,7 +225,7 @@ def test_corrupted_manifest(archivers, request): archive, repository = open_archive(archiver.repository_path, "archive1") with repository: manifest = repository.get_manifest() - corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:] + corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:] repository.put_manifest(corrupted_manifest) cmd(archiver, "check", exit_code=1) output = cmd(archiver, "check", "-v", "--repair", exit_code=0) @@ -273,7 +273,7 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request): archive, repository = open_archive(archiver.repository_path, "archive1") with repository: manifest = repository.get_manifest() - corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:] + corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:] repository.put_manifest(corrupted_manifest) chunk = repository.get(archive.id) corrupted_chunk = chunk + b"corrupted!" @@ -312,7 +312,7 @@ def test_spoofed_archive(archivers, request): with repository: # attacker would corrupt or delete the manifest to trigger a rebuild of it: manifest = repository.get_manifest() - corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:] + corrupted_manifest = manifest[:250] + b"corrupted!" + manifest[250:] repository.put_manifest(corrupted_manifest) archive_dict = { "command_line": "", From d8564b90a4d99fb212aa125c0b798a759e52521e Mon Sep 17 00:00:00 2001 From: Mrityunjay Raj Date: Mon, 1 Jun 2026 23:56:40 +0530 Subject: [PATCH 9/9] repository: make N=1 chunk size assumption explicit in list(), refs #8572 info.size is the on-disk pack file size, which equals the chunk size only when N=1 (one chunk per pack). Extract it into a named variable with a comment so the assumption is visible and easy to fix when N>1 is introduced. --- src/borg/repository.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/borg/repository.py b/src/borg/repository.py index db8ed214de..bba937e336 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -467,7 +467,8 @@ def list(self, limit=None, marker=None): pack_id = hex_to_bin(info.name) chunk_id = pack_id # N=1: chunk_id == pack_id if collect: - result.append((chunk_id, info.size)) + chunk_size = info.size # only correct for N=1 + result.append((chunk_id, chunk_size)) if len(result) == limit: break elif chunk_id == marker: