Skip to content

Commit d18b6f7

Browse files
authored
Merge pull request #46 from gdcc/31-is-tab_ingest-setting-missing-in-_get_json_data
Pass `tabIngest` upon native upload
2 parents e4a8c93 + e41df5f commit d18b6f7

5 files changed

Lines changed: 162 additions & 40 deletions

File tree

dvuploader/dvuploader.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,15 @@ def _check_duplicates(
249249
file._unchanged_data = self._check_hashes(file, ds_file)
250250
if file._unchanged_data:
251251
table.add_row(
252-
file.file_name, "[bright_cyan]Exists", "[bright_black]Replace Meta"
252+
file.file_name,
253+
"[bright_cyan]Exists",
254+
"[bright_black]Replace Meta",
253255
)
254256
else:
255257
table.add_row(
256-
file.file_name, "[bright_cyan]Exists", "[bright_black]Replace"
258+
file.file_name,
259+
"[bright_cyan]Exists",
260+
"[bright_black]Replace",
257261
)
258262
else:
259263
table.add_row(
@@ -302,7 +306,15 @@ def _get_file_id(
302306
# Find the file that matches label and directory_label
303307
for ds_file in ds_files:
304308
dspath = os.path.join(ds_file.get("directoryLabel", ""), ds_file["label"])
305-
fpath = os.path.join(file.directory_label, file.file_name) # type: ignore
309+
310+
if file.directory_label:
311+
fpath = os.path.join(file.directory_label, file.file_name) # type: ignore
312+
elif file.file_name:
313+
fpath = file.file_name
314+
else:
315+
raise ValueError(
316+
f"File {file.file_name} has no directory label or file name."
317+
)
306318

307319
if dspath == fpath:
308320
return ds_file["dataFile"]["id"]

dvuploader/file.py

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,20 +44,72 @@ class File(BaseModel):
4444
arbitrary_types_allowed=True,
4545
)
4646

47-
filepath: str = Field(..., exclude=True)
48-
handler: Union[BytesIO, StringIO, IO, None] = Field(default=None, exclude=True)
49-
description: str = ""
50-
directory_label: str = Field(default="", alias="directoryLabel")
51-
mimeType: str = "application/octet-stream"
52-
categories: Optional[List[str]] = ["DATA"]
53-
restrict: bool = False
54-
checksum_type: ChecksumTypes = Field(default=ChecksumTypes.MD5, exclude=True)
55-
storageIdentifier: Optional[str] = None
56-
file_name: Optional[str] = Field(default=None, alias="fileName")
57-
checksum: Optional[Checksum] = None
58-
to_replace: bool = False
59-
file_id: Optional[Union[str, int]] = Field(default=None, alias="fileToReplaceId")
60-
tab_ingest: bool = Field(default=True, alias="tabIngest")
47+
filepath: str = Field(
48+
...,
49+
exclude=True,
50+
description="The path to the file",
51+
)
52+
handler: Union[BytesIO, StringIO, IO, None] = Field(
53+
default=None,
54+
exclude=True,
55+
description="File handler for reading the file contents",
56+
)
57+
description: Optional[str] = Field(
58+
default=None,
59+
alias="description",
60+
description="The description of the file",
61+
)
62+
directory_label: Optional[str] = Field(
63+
default=None,
64+
alias="directoryLabel",
65+
description="The label of the directory where the file is stored",
66+
)
67+
mimeType: str = Field(
68+
default="application/octet-stream",
69+
description="The MIME type of the file",
70+
)
71+
categories: Optional[List[str]] = Field(
72+
default=["DATA"],
73+
alias="categories",
74+
description="The categories associated with the file",
75+
)
76+
restrict: bool = Field(
77+
default=False,
78+
alias="restrict",
79+
description="Indicates if the file is restricted",
80+
)
81+
checksum_type: ChecksumTypes = Field(
82+
default=ChecksumTypes.MD5,
83+
exclude=True,
84+
description="The type of checksum used for the file",
85+
)
86+
storageIdentifier: Optional[str] = Field(
87+
default=None,
88+
description="The identifier of the storage where the file is stored",
89+
)
90+
file_name: Optional[str] = Field(
91+
default=None,
92+
alias="fileName",
93+
description="The name of the file",
94+
)
95+
checksum: Optional[Checksum] = Field(
96+
default=None,
97+
description="The checksum of the file",
98+
)
99+
file_id: Optional[Union[str, int]] = Field(
100+
default=None,
101+
alias="fileToReplaceId",
102+
description="The ID of the file to replace",
103+
)
104+
tab_ingest: bool = Field(
105+
default=True,
106+
alias="tabIngest",
107+
description="Indicates if tabular ingest should be performed",
108+
)
109+
to_replace: bool = Field(
110+
default=False,
111+
description="Indicates if the file should be replaced",
112+
)
61113

62114
_size: int = PrivateAttr(default=0)
63115
_unchanged_data: bool = PrivateAttr(default=False)
@@ -126,7 +178,6 @@ def apply_checksum(self):
126178

127179
self.checksum.apply_checksum()
128180

129-
130181
def update_checksum_chunked(self, blocksize=2**20):
131182
"""Updates the checksum with data read from a file-like object in chunks.
132183
@@ -155,7 +206,6 @@ def update_checksum_chunked(self, blocksize=2**20):
155206

156207
self.handler.seek(0)
157208

158-
159209
def __del__(self):
160210
if self.handler is not None:
161211
self.handler.close()

dvuploader/nativeupload.py

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,12 @@ async def native_upload(
9292
}
9393

9494
files_new = [file for file in files if not file.to_replace]
95-
files_new_metadata = [file for file in files if file.to_replace and file._unchanged_data]
96-
files_replace = [file for file in files if file.to_replace and not file._unchanged_data]
95+
files_new_metadata = [
96+
file for file in files if file.to_replace and file._unchanged_data
97+
]
98+
files_replace = [
99+
file for file in files if file.to_replace and not file._unchanged_data
100+
]
97101

98102
# These are not in a package but need a metadtata update, ensure even for zips
99103
for file in files_new_metadata:
@@ -114,7 +118,7 @@ async def native_upload(
114118
file.file_name, # type: ignore
115119
total=file._size,
116120
),
117-
file
121+
file,
118122
)
119123
for file in files_replace
120124
]
@@ -325,17 +329,13 @@ def _get_json_data(file: File) -> Dict:
325329
Dict: Dictionary containing file metadata for the upload request.
326330
"""
327331

328-
metadata = {
329-
"description": file.description,
330-
"categories": file.categories,
331-
"restrict": file.restrict,
332-
"forceReplace": True,
332+
include = {
333+
"description",
334+
"categories",
335+
"restrict",
336+
"tabIngest",
333337
}
334-
335-
if file.directory_label:
336-
metadata["directoryLabel"] = file.directory_label
337-
338-
return metadata
338+
return file.model_dump(by_alias=True, exclude_none=True, include=include)
339339

340340

341341
async def _update_metadata(
@@ -368,14 +368,23 @@ async def _update_metadata(
368368
tasks = []
369369

370370
for file in files:
371-
dv_path = os.path.join(file.directory_label, file.file_name) # type: ignore
371+
if file.directory_label:
372+
dv_path = os.path.join(file.directory_label, file.file_name) # type: ignore
373+
elif file.file_name:
374+
dv_path = file.file_name
375+
else:
376+
raise ValueError(
377+
f"File {file.file_name} has no directory label or file name."
378+
)
372379

373380
try:
374381
if _tab_extension(dv_path) in file_mapping:
375382
file_id = file_mapping[_tab_extension(dv_path)]
376383
elif (
377-
file.file_name and _is_zip(file.file_name)
378-
and not file._is_inside_zip and not file._enforce_metadata_update
384+
file.file_name
385+
and _is_zip(file.file_name)
386+
and not file._is_inside_zip
387+
and not file._enforce_metadata_update
379388
):
380389
# When the file is a zip package it will be unpacked and thus
381390
# the expected file name of the zip will not be in the
@@ -426,8 +435,6 @@ async def _update_single_metadata(
426435

427436
json_data = _get_json_data(file)
428437

429-
del json_data["forceReplace"]
430-
431438
# Send metadata as a readable byte stream
432439
# This is a workaround since "data" and "json"
433440
# does not work

tests/unit/test_cli.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,15 @@ def test_full_input(self):
2626
assert cli_input.dataverse_url == "https://demo.dataverse.org/"
2727
assert cli_input.persistent_id == "doi:10.70122/XXX/XXXXX"
2828

29+
actual_files = []
30+
for file in cli_input.files:
31+
if file.directory_label:
32+
actual_files.append((file.directory_label, file.file_name))
33+
else:
34+
actual_files.append(("", file.file_name))
35+
2936
assert len(cli_input.files) == 2
30-
assert sorted(
31-
[(file.directory_label, file.file_name) for file in cli_input.files]
32-
) == sorted(expected_files)
37+
assert sorted(actual_files) == sorted(expected_files)
3338

3439

3540
class TestCLIMain:

tests/unit/test_directupload.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from dvuploader.directupload import (
55
_add_files_to_ds,
66
_validate_ticket_response,
7+
_prepare_registration,
78
)
89

910
from dvuploader.file import File
@@ -128,3 +129,50 @@ def test_raises_assertion_error_when_abort_field_missing(self):
128129
}
129130
with pytest.raises(AssertionError):
130131
_validate_ticket_response(response)
132+
133+
134+
class TestPrepareRegistration:
135+
def test_tab_ingest_is_set_correctly(self):
136+
files = [
137+
File(filepath="tests/fixtures/add_dir_files/somefile.txt"),
138+
File(
139+
filepath="tests/fixtures/add_dir_files/somefile.txt",
140+
tab_ingest=False, # type: ignore
141+
),
142+
File(
143+
filepath="tests/fixtures/add_dir_files/somefile.txt",
144+
restrict=True,
145+
),
146+
File(
147+
filepath="tests/fixtures/add_dir_files/somefile.txt",
148+
categories=["Test file"],
149+
),
150+
]
151+
registration = _prepare_registration(files, use_replace=False)
152+
expected_registration = [
153+
{
154+
"categories": ["DATA"],
155+
"mimeType": "application/octet-stream",
156+
"restrict": False,
157+
"tabIngest": True,
158+
},
159+
{
160+
"categories": ["DATA"],
161+
"mimeType": "application/octet-stream",
162+
"restrict": False,
163+
"tabIngest": False,
164+
},
165+
{
166+
"categories": ["DATA"],
167+
"mimeType": "application/octet-stream",
168+
"restrict": True,
169+
"tabIngest": True,
170+
},
171+
{
172+
"categories": ["Test file"],
173+
"mimeType": "application/octet-stream",
174+
"restrict": False,
175+
"tabIngest": True,
176+
},
177+
]
178+
assert registration == expected_registration

0 commit comments

Comments
 (0)