From f86f8d7eecf2acd692e81624d25308bf21cc0fa6 Mon Sep 17 00:00:00 2001 From: "GPT 5.5" Date: Tue, 28 Apr 2026 08:29:37 +0800 Subject: [PATCH] NEEDS REVIEW AND COMMIT-SPLIT/changelog handling --- gix-object/src/traits/_impls.rs | 57 ++++++++++++++++++++ gix-object/src/traits/mod.rs | 27 ++++++++++ gix-odb/src/cache.rs | 19 +++++++ gix-odb/src/memory.rs | 32 +++++++++++ gix-odb/src/store_impls/dynamic/write.rs | 43 +++++++++++++++ gix-odb/src/store_impls/loose/write.rs | 67 ++++++++++++++++++++++-- gix-odb/tests/odb/store/loose.rs | 13 +++++ gix/src/repository/impls.rs | 2 +- gix/src/repository/object.rs | 6 +-- 9 files changed, 257 insertions(+), 9 deletions(-) diff --git a/gix-object/src/traits/_impls.rs b/gix-object/src/traits/_impls.rs index 385e62363ea..8d6bf2e2a82 100644 --- a/gix-object/src/traits/_impls.rs +++ b/gix-object/src/traits/_impls.rs @@ -16,9 +16,28 @@ where (*self).write_buf(object, from) } + fn write_buf_with_known_id( + &self, + id: ObjectId, + object: Kind, + from: &[u8], + ) -> Result { + (*self).write_buf_with_known_id(id, object, from) + } + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { (*self).write_stream(kind, size, from) } + + fn write_stream_with_known_id( + &self, + id: ObjectId, + kind: Kind, + size: u64, + from: &mut dyn Read, + ) -> Result { + (*self).write_stream_with_known_id(id, kind, size, from) + } } impl crate::Write for Arc @@ -33,9 +52,28 @@ where self.deref().write_buf(object, from) } + fn write_buf_with_known_id( + &self, + id: ObjectId, + object: Kind, + from: &[u8], + ) -> Result { + self.deref().write_buf_with_known_id(id, object, from) + } + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { self.deref().write_stream(kind, size, from) } + + fn write_stream_with_known_id( + &self, + id: ObjectId, + kind: Kind, + size: u64, + from: &mut dyn Read, + ) -> Result { + self.deref().write_stream_with_known_id(id, kind, size, from) + } } impl crate::Write for Rc @@ -50,9 +88,28 @@ where self.deref().write_buf(object, from) } + fn write_buf_with_known_id( + &self, + id: ObjectId, + object: Kind, + from: &[u8], + ) -> Result { + self.deref().write_buf_with_known_id(id, object, from) + } + fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { self.deref().write_stream(kind, size, from) } + + fn write_stream_with_known_id( + &self, + id: ObjectId, + kind: Kind, + size: u64, + from: &mut dyn Read, + ) -> Result { + self.deref().write_stream_with_known_id(id, kind, size, from) + } } impl WriteTo for &T diff --git a/gix-object/src/traits/mod.rs b/gix-object/src/traits/mod.rs index f0078fd9345..95379bb6cab 100644 --- a/gix-object/src/traits/mod.rs +++ b/gix-object/src/traits/mod.rs @@ -15,6 +15,19 @@ pub trait Write { fn write_buf(&self, object: crate::Kind, mut from: &[u8]) -> Result { self.write_stream(object, from.len() as u64, &mut from) } + /// As [`write_buf`](Write::write_buf), but the object id has already been computed by the caller. + /// + /// Implementations may trust the given `id` and avoid computing it again. Callers must make sure `id` matches + /// the provided `object` and `from` bytes. + fn write_buf_with_known_id( + &self, + id: gix_hash::ObjectId, + object: crate::Kind, + from: &[u8], + ) -> Result { + let _ = id; + self.write_buf(object, from) + } /// As [`write`](Write::write), but takes an input stream. /// This is commonly used for writing blobs directly without reading them to memory first. fn write_stream( @@ -23,6 +36,20 @@ pub trait Write { size: u64, from: &mut dyn io::Read, ) -> Result; + /// As [`write_stream`](Write::write_stream), but the object id has already been computed by the caller. + /// + /// Implementations may trust the given `id` and avoid computing it again. Callers must make sure `id` matches + /// the provided `kind`, `size` and stream contents. + fn write_stream_with_known_id( + &self, + id: gix_hash::ObjectId, + kind: crate::Kind, + size: u64, + from: &mut dyn io::Read, + ) -> Result { + let _ = id; + self.write_stream(kind, size, from) + } } /// Writing of objects to a `Write` implementation diff --git a/gix-odb/src/cache.rs b/gix-odb/src/cache.rs index 87523f5e4c9..36d57b83ff5 100644 --- a/gix-odb/src/cache.rs +++ b/gix-odb/src/cache.rs @@ -153,6 +153,25 @@ mod impls { ) -> Result { self.inner.write_stream(kind, size, from) } + + fn write_buf_with_known_id( + &self, + id: ObjectId, + kind: Kind, + from: &[u8], + ) -> Result { + self.inner.write_buf_with_known_id(id, kind, from) + } + + fn write_stream_with_known_id( + &self, + id: ObjectId, + kind: Kind, + size: u64, + from: &mut dyn Read, + ) -> Result { + self.inner.write_stream_with_known_id(id, kind, size, from) + } } impl gix_object::Find for Cache diff --git a/gix-odb/src/memory.rs b/gix-odb/src/memory.rs index ba5b4ef9faf..5abf8bebcfd 100644 --- a/gix-odb/src/memory.rs +++ b/gix-odb/src/memory.rs @@ -227,6 +227,38 @@ where map.borrow_mut().insert(id, (kind, buf)); Ok(id) } + + fn write_buf_with_known_id( + &self, + id: gix_hash::ObjectId, + kind: gix_object::Kind, + from: &[u8], + ) -> Result { + let Some(map) = self.memory.as_ref() else { + return self.inner.write_buf_with_known_id(id, kind, from); + }; + + map.borrow_mut().insert(id, (kind, from.to_owned())); + Ok(id) + } + + fn write_stream_with_known_id( + &self, + id: gix_hash::ObjectId, + kind: gix_object::Kind, + size: u64, + from: &mut dyn std::io::Read, + ) -> Result { + let Some(map) = self.memory.as_ref() else { + return self.inner.write_stream_with_known_id(id, kind, size, from); + }; + + let mut buf = Vec::new(); + from.read_to_end(&mut buf)?; + + map.borrow_mut().insert(id, (kind, buf)); + Ok(id) + } } impl Deref for Proxy { diff --git a/gix-odb/src/store_impls/dynamic/write.rs b/gix-odb/src/store_impls/dynamic/write.rs index d0b8735121f..e09586038f0 100644 --- a/gix-odb/src/store_impls/dynamic/write.rs +++ b/gix-odb/src/store_impls/dynamic/write.rs @@ -43,4 +43,47 @@ where } }) } + + fn write_buf_with_known_id( + &self, + id: ObjectId, + kind: Kind, + from: &[u8], + ) -> Result { + let mut snapshot = self.snapshot.borrow_mut(); + Ok(match snapshot.loose_dbs.first() { + Some(ldb) => ldb.write_buf_with_known_id(id, kind, from)?, + None => { + let new_snapshot = self + .store + .load_one_index(self.refresh, snapshot.marker) + .map_err(Box::new)? + .expect("there is always at least one ODB, and this code runs only once for initialization"); + *snapshot = new_snapshot; + snapshot.loose_dbs[0].write_buf_with_known_id(id, kind, from)? + } + }) + } + + fn write_stream_with_known_id( + &self, + id: ObjectId, + kind: Kind, + size: u64, + from: &mut dyn Read, + ) -> Result { + let mut snapshot = self.snapshot.borrow_mut(); + Ok(match snapshot.loose_dbs.first() { + Some(ldb) => ldb.write_stream_with_known_id(id, kind, size, from)?, + None => { + let new_snapshot = self + .store + .load_one_index(self.refresh, snapshot.marker) + .map_err(Box::new)? + .expect("there is always at least one ODB, and this code runs only once for initialization"); + *snapshot = new_snapshot; + snapshot.loose_dbs[0].write_stream_with_known_id(id, kind, size, from)? + } + }) + } } diff --git a/gix-odb/src/store_impls/loose/write.rs b/gix-odb/src/store_impls/loose/write.rs index d94c16556bd..49afc866f19 100644 --- a/gix-odb/src/store_impls/loose/write.rs +++ b/gix-odb/src/store_impls/loose/write.rs @@ -64,6 +64,29 @@ impl gix_object::Write for Store { Ok(self.finalize_object(to)?) } + fn write_buf_with_known_id( + &self, + id: gix_hash::ObjectId, + kind: gix_object::Kind, + from: &[u8], + ) -> Result { + let mut to = self.compressed_tempfile().map_err(Box::new)?; + to.write_all(&gix_object::encode::loose_header(kind, from.len() as u64)) + .map_err(|err| Error::Io { + source: err.into(), + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + to.write_all(from).map_err(|err| Error::Io { + source: err.into(), + message: "stream all data into tempfile in", + path: self.path.to_owned(), + })?; + to.flush()?; + Ok(self.finalize_object_at(id.as_ref(), to)?) + } + /// Write the given stream in `from` to disk with at least one syscall. /// /// This will cost at least 4 IO operations. @@ -91,6 +114,32 @@ impl gix_object::Write for Store { to.flush().map_err(Box::new)?; Ok(self.finalize_object(to)?) } + + fn write_stream_with_known_id( + &self, + id: gix_hash::ObjectId, + kind: gix_object::Kind, + size: u64, + mut from: &mut dyn io::Read, + ) -> Result { + let mut to = self.compressed_tempfile().map_err(Box::new)?; + to.write_all(&gix_object::encode::loose_header(kind, size)) + .map_err(|err| Error::Io { + source: err.into(), + message: "write header to tempfile in", + path: self.path.to_owned(), + })?; + + io::copy(&mut from, &mut to) + .map_err(|err| Error::Io { + source: err.into(), + message: "stream all data into tempfile in", + path: self.path.to_owned(), + }) + .map_err(Box::new)?; + to.flush().map_err(Box::new)?; + Ok(self.finalize_object_at(id.as_ref(), to)?) + } } type CompressedTempfile = deflate::Write; @@ -107,6 +156,10 @@ impl Store { impl Store { fn dest(&self) -> Result, Error> { + Ok(gix_hash::io::Write::new(self.compressed_tempfile()?, self.object_hash)) + } + + fn compressed_tempfile(&self) -> Result { #[cfg_attr(not(unix), allow(unused_mut))] let mut builder = tempfile::Builder::new(); #[cfg(unix)] @@ -115,14 +168,13 @@ impl Store { let perms = std::fs::Permissions::from_mode(0o444); builder.permissions(perms); } - Ok(gix_hash::io::Write::new( - deflate::Write::new(builder.tempfile_in(&self.path).map_err(|err| Error::Io { + Ok(deflate::Write::new(builder.tempfile_in(&self.path).map_err(|err| { + Error::Io { source: err.into(), message: "create named temp file in", path: self.path.to_owned(), - })?), - self.object_hash, - )) + } + })?)) } fn finalize_object( @@ -134,6 +186,11 @@ impl Store { message: "hash tempfile in", path: self.path.to_owned(), })?; + self.finalize_object_at(&id, file) + } + + fn finalize_object_at(&self, id: &gix_hash::oid, file: CompressedTempfile) -> Result { + let id = id.to_owned(); let object_path = loose::hash_path(&id, self.path.clone()); let object_dir = object_path .parent() diff --git a/gix-odb/tests/odb/store/loose.rs b/gix-odb/tests/odb/store/loose.rs index 391ca70b5ee..608c8d8bd62 100644 --- a/gix-odb/tests/odb/store/loose.rs +++ b/gix-odb/tests/odb/store/loose.rs @@ -73,6 +73,19 @@ mod write { db.try_find(&oid, &mut buf2)?.expect("id present").decode()?, obj.decode()? ); + let actual = db.write_buf_with_known_id(oid, obj.kind, obj.data)?; + assert_eq!(actual, oid); + assert_eq!( + db.try_find(&oid, &mut buf2)?.expect("id present").decode()?, + obj.decode()? + ); + let mut from = obj.data; + let actual = db.write_stream_with_known_id(oid, obj.kind, obj.data.len() as u64, &mut from)?; + assert_eq!(actual, oid); + assert_eq!( + db.try_find(&oid, &mut buf2)?.expect("id present").decode()?, + obj.decode()? + ); } Ok(()) } diff --git a/gix/src/repository/impls.rs b/gix/src/repository/impls.rs index e8a25403159..7558dad1247 100644 --- a/gix/src/repository/impls.rs +++ b/gix/src/repository/impls.rs @@ -111,7 +111,7 @@ impl gix_object::Write for crate::Repository { if self.objects.exists(&oid) { return Ok(oid); } - self.objects.write_buf(object, from) + self.objects.write_buf_with_known_id(oid, object, from) } fn write_stream( diff --git a/gix/src/repository/object.rs b/gix/src/repository/object.rs index ff4df5b0429..6245295fcf0 100644 --- a/gix/src/repository/object.rs +++ b/gix/src/repository/object.rs @@ -265,7 +265,7 @@ impl crate::Repository { } self.objects - .write_buf(kind, buf) + .write_buf_with_known_id(oid, kind, buf) .map(|oid| oid.attach(self)) .map_err(Into::into) } @@ -295,7 +295,7 @@ impl crate::Repository { return Ok(oid.attach(self)); } self.objects - .write_buf(gix_object::Kind::Blob, bytes) + .write_buf_with_known_id(oid, gix_object::Kind::Blob, bytes) .map_err(Into::into) .map(|oid| oid.attach(self)) } @@ -322,7 +322,7 @@ impl crate::Repository { } self.objects - .write_buf(gix_object::Kind::Blob, buf) + .write_buf_with_known_id(oid, gix_object::Kind::Blob, buf) .map_err(Into::into) .map(|oid| oid.attach(self)) }