Skip to content

Commit e4a8c93

Browse files
authored
Merge pull request #36 from bnavigator/metadata-native-zips
Native Uploader: Handle updating the metadata native zip files, not to be unpacked.
2 parents 1af85a7 + 7ab1e16 commit e4a8c93

4 files changed

Lines changed: 84 additions & 2 deletions

File tree

dvuploader/file.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class File(BaseModel):
3030
Private Attributes:
3131
_size (int): Size of the file in bytes.
3232
_unchanged_data (bool): Indicates if the file data has not changed since last upload.
33+
_enforce_metadata_update (bool): Indicates if metadata update is enforced.
34+
_is_inside_zip (bool): Indicates if the file is packaged inside a zip archive.
3335
3436
Methods:
3537
extract_file_name(): Extracts filename from filepath and initializes file handler.
@@ -59,6 +61,8 @@ class File(BaseModel):
5961

6062
_size: int = PrivateAttr(default=0)
6163
_unchanged_data: bool = PrivateAttr(default=False)
64+
_enforce_metadata_update: bool = PrivateAttr(default=False)
65+
_is_inside_zip: bool = PrivateAttr(default=False)
6266

6367
def extract_file_name(self):
6468
"""

dvuploader/nativeupload.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ async def native_upload(
9595
files_new_metadata = [file for file in files if file.to_replace and file._unchanged_data]
9696
files_replace = [file for file in files if file.to_replace and not file._unchanged_data]
9797

98+
# These are not in a package but need a metadtata update, ensure even for zips
99+
for file in files_new_metadata:
100+
file._enforce_metadata_update = True
101+
98102
async with httpx.AsyncClient(**session_params) as session:
99103
with tempfile.TemporaryDirectory() as tmp_dir:
100104
packages = distribute_files(files_new)
@@ -369,8 +373,11 @@ async def _update_metadata(
369373
try:
370374
if _tab_extension(dv_path) in file_mapping:
371375
file_id = file_mapping[_tab_extension(dv_path)]
372-
elif file.file_name and _is_zip(file.file_name):
373-
# When the file is a zip it will be unpacked and thus
376+
elif (
377+
file.file_name and _is_zip(file.file_name)
378+
and not file._is_inside_zip and not file._enforce_metadata_update
379+
):
380+
# When the file is a zip package it will be unpacked and thus
374381
# the expected file name of the zip will not be in the
375382
# dataset, since it has been unpacked.
376383
continue

dvuploader/packaging.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def zip_files(
9898
data=file.handler.read(), # type: ignore
9999
zinfo_or_arcname=_create_arcname(file),
100100
)
101+
file._is_inside_zip = True
101102

102103
return path
103104

tests/integration/test_native_upload.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,76 @@ def test_zipzip_file_upload(
460460

461461
assert sorted([file["label"] for file in files]) == sorted(expected_files)
462462

463+
def test_metadata_with_zip_files_in_package(self, credentials):
464+
BASE_URL, API_TOKEN = credentials
465+
466+
# Create Dataset
467+
pid = create_dataset(
468+
parent="Root",
469+
server_url=BASE_URL,
470+
api_token=API_TOKEN,
471+
)
472+
473+
# Arrange
474+
files = [
475+
File(filepath="tests/fixtures/archive.zip",
476+
dv_dir="subdir2",
477+
description="This file should not be unzipped",
478+
categories=["Test file"]
479+
),
480+
File(filepath="tests/fixtures/add_dir_files/somefile.txt",
481+
dv_dir="subdir",
482+
description="A simple text file",
483+
categories=["Test file"]
484+
),
485+
]
486+
487+
# Act
488+
uploader = DVUploader(files=files)
489+
uploader.upload(
490+
persistent_id=pid,
491+
api_token=API_TOKEN,
492+
dataverse_url=BASE_URL,
493+
n_parallel_uploads=10,
494+
)
495+
496+
# Assert
497+
files = retrieve_dataset_files(
498+
dataverse_url=BASE_URL,
499+
persistent_id=pid,
500+
api_token=API_TOKEN,
501+
)
502+
503+
assert len(files) == 2, f"Expected 2 files, got {len(files)}"
504+
505+
expected_files = [
506+
{
507+
"label": "archive.zip",
508+
"description": "This file should not be unzipped",
509+
"categories": ["Test file"]
510+
},
511+
{
512+
"label": "somefile.txt",
513+
"description": "A simple text file",
514+
"categories": ["Test file"]
515+
},
516+
]
517+
518+
files_as_expected = sorted(
519+
[
520+
{
521+
k: (f[k] if k in f else None)
522+
for k in expected_files[0].keys()
523+
}
524+
for f in files
525+
],
526+
key=lambda x: x["label"]
527+
)
528+
assert files_as_expected == expected_files, (
529+
f"File metadata not as expected: {json.dumps(files, indent=2)}"
530+
)
531+
532+
463533
def test_too_many_zip_files(
464534
self,
465535
credentials,

0 commit comments

Comments
 (0)