@@ -239,7 +239,6 @@ def _transfer_bg_thread(
239239 error_queue ,
240240 ):
241241 """Transfer image to storage in the background
242-
243242 Args:
244243 src_path: Path to the source image
245244 src_operator: Operator to read the source image
@@ -251,35 +250,30 @@ def _transfer_bg_thread(
251250 try :
252251 filename = Path (src_path ).name if isinstance (src_path , (str , os .PathLike )) else src_path .name
253252
254- # if the source is a local file, we can hash it and compare to the known hash
255253 if src_operator_scheme == "fs" :
256254 file_hash = self ._sha256_file (src_operator , src_path )
257- if to_storage .exists (filename ) and to_storage .hash (filename ) == file_hash :
258- self .logger .info (f"Image { filename } already exists in storage with matching hash, skipping" )
259- return
255+ self .logger .info (f"Hash of { filename } is { file_hash } " )
256+ else :
257+ file_hash = known_hash
258+ self .logger .info (f"Using provided hash for { filename } : { known_hash } " )
260259
261- # if the source is a remote file, we can't hash it, so we need to check the hash from the metadata
262- elif known_hash and to_storage .exists (filename ):
263- self .logger .info (f"Image { filename } already exists in storage, checking hash" )
264- if to_storage .hash (filename ) == known_hash :
265- self .logger .info (f"Image { filename } already exists in storage with matching hash, skipping" )
266- return
260+ if file_hash and to_storage .exists (filename ):
261+ to_storage_hash = to_storage .hash (filename )
262+ self .logger .info (f"Hash of existing file in storage: { to_storage_hash } " )
267263
268- # last attempt to check if the image in storage matches the known metadata
269- metadata , metadata_json = self ._create_metadata_and_json (src_operator , src_path )
270- metadata_file = filename + ".metadata"
271- self .logger .info (f"Metadata: { metadata_json } " )
272- if to_storage .exists (metadata_file ) and to_storage .exists (filename ):
273- self .logger .info (f"Checking metadata for file in exporter storage { metadata_file } " )
274- data = to_storage .read_bytes (metadata_file ).decode (errors = "ignore" )
275- if to_storage .stat (filename ).content_length == metadata .content_length and data == metadata_json :
276- self .logger .info (f"Image { filename } already exists in storage with matching metadata, skipping" )
264+ if to_storage_hash == file_hash :
265+ self .logger .info (f"Image { filename } already exists in storage with matching hash, skipping" )
277266 return
267+ else :
268+ self .logger .info (f"Image { filename } exists in storage but hash differs, will overwrite" )
278269
279- # ok, we need to write the image to storage
270+ self . logger . info ( f"Uploading image to storage: { filename } " )
280271 to_storage .write_from_path (filename , src_path , src_operator )
281- # but also write the metadata to be able to check for matching images later
272+
273+ metadata , metadata_json = self ._create_metadata_and_json (src_operator , src_path , file_hash )
274+ metadata_file = filename + ".metadata"
282275 to_storage .write_bytes (metadata_file , metadata_json .encode (errors = "ignore" ))
276+
283277 self .logger .info (f"Image written to storage: { filename } " )
284278
285279 except Exception as e :
@@ -298,16 +292,19 @@ def _sha256_file(self, src_operator, src_path) -> str:
298292
299293 return m .hexdigest ()
300294
301- def _create_metadata_and_json (self , src_operator , src_path ) -> tuple [Metadata , str ]:
295+ def _create_metadata_and_json (self , src_operator , src_path , file_hash = None ) -> tuple [Metadata , str ]:
302296 """Create a metadata json string from a metadata object"""
303297 metadata = src_operator .stat (src_path )
304- return metadata , json .dumps (
305- {
306- "path" : str (src_path ),
307- "content_length" : metadata .content_length ,
308- "etag" : metadata .etag ,
309- }
310- )
298+ metadata_dict = {
299+ "path" : str (src_path ),
300+ "content_length" : metadata .content_length ,
301+ "etag" : metadata .etag ,
302+ }
303+
304+ if file_hash :
305+ metadata_dict ["hash" ] = file_hash
306+
307+ return metadata , json .dumps (metadata_dict )
311308
312309 def _lookup_block_device (self , console , prompt , address : str ) -> str :
313310 """Lookup block device for a given address.
0 commit comments