diff --git a/CLAUDE.md b/CLAUDE.md index ea98a7eef..0a47bd9a6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -36,6 +36,16 @@ pip install -e ".[docs]" && mkdocs serve ## Conventions +### Docstring examples format +Use mkdocstrings admonition syntax. Start with a description line, then wrap code in a fenced python block. Examples should include login boilerplate. + + Example: Short title +   + Description of what the example does. + ```python + + ``` + ### Async-first with generated sync wrappers All new methods must be async with `_async` suffix. The `@async_to_sync` class decorator (`core/async_utils.py`) auto-generates sync counterparts at class definition time. Never write sync methods manually on model classes — the decorator handles it. diff --git a/docs/reference/experimental/async/download_list.md b/docs/reference/experimental/async/download_list.md new file mode 100644 index 000000000..990ac3d2d --- /dev/null +++ b/docs/reference/experimental/async/download_list.md @@ -0,0 +1,30 @@ +# Download List + +The Synapse Download List (cart) lets you queue files for bulk download via the Synapse +web UI or API. Files are downloaded individually rather than packaged into a zip because +download lists can exceed 100 GB. Successfully downloaded files are removed from the cart +automatically, so interrupted runs are safely resumable. + +## API Reference + +[](){ #download-list-reference-async } + +::: synapseclient.operations.download_list_files_async + +::: synapseclient.operations.download_list_manifest_async + +::: synapseclient.operations.download_list_add_async + +::: synapseclient.operations.download_list_remove_async + +::: synapseclient.operations.download_list_clear_async + +--- + +[](){ #download-list-item-reference-async } +## DownloadListItem + +Identifies a specific file version in the download list. Used as input to +download_list_add_async and download_list_remove_async. + +::: synapseclient.operations.DownloadListItem diff --git a/docs/reference/experimental/sync/download_list.md b/docs/reference/experimental/sync/download_list.md new file mode 100644 index 000000000..8e3a1829c --- /dev/null +++ b/docs/reference/experimental/sync/download_list.md @@ -0,0 +1,42 @@ +[](){ #download-list-reference-sync } +# Download List + +The Synapse Download List (cart) lets you queue files for bulk download via the Synapse +web UI or API. Files are downloaded individually rather than packaged into a zip because +download lists can exceed 100 GB. Successfully downloaded files are removed from the cart +automatically, so interrupted runs are safely resumable. + +## Example + +```python +from synapseclient import Synapse +from synapseclient.operations import download_list_files + +syn = Synapse() +syn.login() + +# Download all files in the cart to a local directory +manifest_path = download_list_files(download_location="./downloads") +``` + +## API Reference + +::: synapseclient.operations.download_list_files + +::: synapseclient.operations.download_list_manifest + +::: synapseclient.operations.download_list_add + +::: synapseclient.operations.download_list_remove + +::: synapseclient.operations.download_list_clear + +--- + +[](){ #download-list-item-reference-sync } +## DownloadListItem + +Identifies a specific file version in the download list. Used as input to +download_list_add and download_list_remove. + +::: synapseclient.operations.DownloadListItem diff --git a/mkdocs.yml b/mkdocs.yml index ee7b796a7..cc6eb5ce4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -114,6 +114,7 @@ nav: - JSONSchema: reference/experimental/sync/json_schema.md - Wiki: reference/experimental/sync/wiki.md - FormGroup and Form: reference/experimental/sync/form.md + - Download List: reference/experimental/sync/download_list.md - Extensions: - Curator: reference/extensions/curator.md - Asynchronous: @@ -142,6 +143,7 @@ nav: - JSONSchema: reference/experimental/async/json_schema.md - Wiki: reference/experimental/async/wiki.md - FormGroup and Form: reference/experimental/async/form.md + - Download List: reference/experimental/async/download_list.md - Mixins: - AccessControllable: reference/experimental/mixins/access_controllable.md - StorableContainer: reference/experimental/mixins/storable_container.md diff --git a/synapseclient/__main__.py b/synapseclient/__main__.py index c86ecf75d..fa462bb4c 100644 --- a/synapseclient/__main__.py +++ b/synapseclient/__main__.py @@ -676,7 +676,12 @@ def submit(args, syn): def get_download_list(args, syn: synapseclient.Synapse) -> None: """Download files from the Synapse download cart""" - manifest_path = syn.get_download_list(downloadLocation=args.downloadLocation) + from synapseclient.operations import download_list_files + + manifest_path = download_list_files( + download_location=args.downloadLocation, + synapse_client=syn, + ) syn.logger.info(f"Manifest file: {manifest_path}") diff --git a/synapseclient/api/CLAUDE.md b/synapseclient/api/CLAUDE.md index 01c2d0ecd..9d267a2b2 100644 --- a/synapseclient/api/CLAUDE.md +++ b/synapseclient/api/CLAUDE.md @@ -4,6 +4,10 @@ REST API service layer — thin async functions that map to Synapse REST endpoints. One file per resource type. Called by model layer, never by end users directly. +## Reference + +- [Synapse REST API docs](https://rest-docs.synapse.org/rest/) + ## Conventions ### Function signature pattern diff --git a/synapseclient/api/__init__.py b/synapseclient/api/__init__.py index 2f9e454ea..247e16229 100644 --- a/synapseclient/api/__init__.py +++ b/synapseclient/api/__init__.py @@ -27,6 +27,11 @@ ) from .docker_commit_services import get_docker_tag from .docker_services import get_entity_id_by_repository_name +from .download_list_services import ( + add_to_download_list_async, + clear_download_list_async, + remove_from_download_list_async, +) from .entity_bundle_services_v2 import ( get_entity_id_bundle2, get_entity_id_version_bundle2, @@ -95,6 +100,7 @@ get_file_handle, get_file_handle_for_download, get_file_handle_for_download_async, + get_file_handle_presigned_url, post_external_filehandle, post_external_object_store_filehandle, post_external_s3_file_handle, @@ -196,6 +202,7 @@ "post_external_object_store_filehandle", "post_external_s3_file_handle", "get_file_handle", + "get_file_handle_presigned_url", "post_external_filehandle", "post_file_multipart_presigned_urls", "put_file_multipart_add", @@ -318,6 +325,10 @@ "list_curation_tasks", "list_grid_sessions", "update_curation_task", + # download_list_services + "add_to_download_list_async", + "clear_download_list_async", + "remove_from_download_list_async", # docker_commit_services "get_docker_tag", # docker_services diff --git a/synapseclient/api/download_list_services.py b/synapseclient/api/download_list_services.py new file mode 100644 index 000000000..003b8c973 --- /dev/null +++ b/synapseclient/api/download_list_services.py @@ -0,0 +1,93 @@ +"""This module is responsible for exposing the services defined at: + +""" + +import json +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from synapseclient import Synapse + from synapseclient.operations.download_list_operations import DownloadListItem + + +async def clear_download_list_async( + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Clear all files from the user's Synapse download list. + + + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + await client.rest_delete_async("/download/list") + + +async def add_to_download_list_async( + files: list["DownloadListItem"], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Add a batch of specific file versions to the user's Synapse download list. + + + + Arguments: + files: List of DownloadListItem objects identifying the file versions to add. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files added to the download list. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + batch = [ + {"fileEntityId": item.file_entity_id, "versionNumber": item.version_number} + for item in files + ] + request_body = {"batchToAdd": batch} + response = await client.rest_post_async( + "/download/list/add", body=json.dumps(request_body) + ) + return response["numberOfFilesAdded"] + + +async def remove_from_download_list_async( + files: list["DownloadListItem"], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Remove a batch of specific file versions from the user's Synapse download list. + + + + Arguments: + files: List of DownloadListItem objects identifying the file versions to remove. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files removed from the download list. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + batch = [ + {"fileEntityId": item.file_entity_id, "versionNumber": item.version_number} + for item in files + ] + request_body = {"batchToRemove": batch} + response = await client.rest_post_async( + "/download/list/remove", body=json.dumps(request_body) + ) + return response["numberOfFilesRemoved"] diff --git a/synapseclient/api/file_services.py b/synapseclient/api/file_services.py index a9894f27f..75e451dec 100644 --- a/synapseclient/api/file_services.py +++ b/synapseclient/api/file_services.py @@ -384,6 +384,45 @@ async def get_file_handle( ) +async def get_file_handle_presigned_url( + file_handle_id: str, + *, + synapse_client: Optional["Synapse"] = None, +) -> str: + """ + Get a pre-signed URL for a file handle owned by the current user. + Unlike get_file_handle_for_download_async, this does not require an + associated Synapse entity — only that the caller is the creator of the + file handle. + + + + Arguments: + file_handle_id: The ID of the file handle. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Raises: + SynapseFileNotFoundError: If the fileHandleId is not found in Synapse. + SynapseAuthorizationError: If the caller is not the creator of the + file handle. + + Returns: + A pre-signed URL string for downloading the file. The Synapse endpoint + returns the URL as text/plain when redirect=false, so rest_get_async + returns a plain string rather than a dict for this call. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + + return await client.rest_get_async( + f"/fileHandle/{file_handle_id}/url?redirect=false", + endpoint=client.fileHandleEndpoint, + ) + + async def get_file_handle_for_download_async( file_handle_id: str, synapse_id: str, diff --git a/synapseclient/client.py b/synapseclient/client.py index 6a869dd2d..4d4785fcc 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -3147,15 +3147,68 @@ async def upload_file(): ############################################################ # Download List # ############################################################ - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_clear()` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def clear_download_list(self): - """Clear all files from download list""" + """Clear all files from download list + + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # syn.clear_download_list() + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_clear + + syn = Synapse() + syn.login() + + # Clear all files from the download list + download_list_clear() + ``` + """ self.restDELETE("/download/list") - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_remove(files)` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def remove_from_download_list(self, list_of_files: typing.List[typing.Dict]) -> int: """Remove a batch of files from download list + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # syn.remove_from_download_list([ + # {"fileEntityId": "syn123", "versionNumber": 1}, + # ]) + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_remove, DownloadListItem + + syn = Synapse() + syn.login() + + # Remove specific file versions from the download list + download_list_remove([ + DownloadListItem(file_entity_id="syn123", version_number=1), + ]) + ``` + Arguments: list_of_files: Array of files in the format of a mapping {fileEntityId: synid, versionNumber: version} @@ -3168,7 +3221,13 @@ def remove_from_download_list(self, list_of_files: typing.List[typing.Dict]) -> ) return num_files_removed - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_manifest()` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def _generate_manifest_from_download_list( self, quoteCharacter: str = '"', @@ -3177,8 +3236,25 @@ def _generate_manifest_from_download_list( separator: str = ",", header: bool = True, ): - """ - Creates a download list manifest generation request + """Creates a download list manifest generation request + + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # manifest_handle = syn._generate_manifest_from_download_list() + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest + + syn = Synapse() + syn.login() + + # Generate and download the manifest CSV + manifest_path = download_list_manifest() + ``` Arguments: quoteCharacter: The character to be used for quoted elements in the resulting file. @@ -3204,10 +3280,34 @@ def _generate_manifest_from_download_list( uri="/download/list/manifest/async", request=request_body ) - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_manifest()` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def get_download_list_manifest(self): """Get the path of the download list manifest file + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # manifest_path = syn.get_download_list_manifest() + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest + + syn = Synapse() + syn.login() + + # Generate and download the manifest CSV + manifest_path = download_list_manifest() + ``` + Returns: Path of download list manifest file """ @@ -3231,10 +3331,34 @@ def get_download_list_manifest(self): ) return downloaded_path - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_files(download_location=...)` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def get_download_list(self, downloadLocation: str = None) -> str: """Download all files from your Synapse download list + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # manifest_path = syn.get_download_list(downloadLocation="./downloads") + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_files + + syn = Synapse() + syn.login() + + # Download all files in the cart and get the result manifest path + manifest_path = download_list_files(download_location="./downloads") + ``` + Arguments: downloadLocation: Directory to download files to. diff --git a/synapseclient/core/constants/concrete_types.py b/synapseclient/core/constants/concrete_types.py index fba11dbdb..08736f2e3 100644 --- a/synapseclient/core/constants/concrete_types.py +++ b/synapseclient/core/constants/concrete_types.py @@ -117,6 +117,11 @@ "org.sagebionetworks.repo.model.curation.metadata.RecordBasedMetadataTaskProperties" ) +# Download List +DOWNLOAD_LIST_MANIFEST_REQUEST = ( + "org.sagebionetworks.repo.model.download.DownloadListManifestRequest" +) + # Grid Session Types CREATE_GRID_REQUEST = "org.sagebionetworks.repo.model.grid.CreateGridRequest" GRID_RECORD_SET_EXPORT_REQUEST = ( diff --git a/synapseclient/models/mixins/asynchronous_job.py b/synapseclient/models/mixins/asynchronous_job.py index fd3649bc1..407babe92 100644 --- a/synapseclient/models/mixins/asynchronous_job.py +++ b/synapseclient/models/mixins/asynchronous_job.py @@ -14,6 +14,7 @@ AGENT_CHAT_REQUEST, CREATE_GRID_REQUEST, CREATE_SCHEMA_REQUEST, + DOWNLOAD_LIST_MANIFEST_REQUEST, GET_VALIDATION_SCHEMA_REQUEST, GRID_RECORD_SET_EXPORT_REQUEST, QUERY_BUNDLE_REQUEST, @@ -29,6 +30,7 @@ ASYNC_JOB_URIS = { AGENT_CHAT_REQUEST: "/agent/chat/async", CREATE_GRID_REQUEST: "/grid/session/async", + DOWNLOAD_LIST_MANIFEST_REQUEST: "/download/list/manifest/async", GRID_RECORD_SET_EXPORT_REQUEST: "/grid/export/recordset/async", TABLE_UPDATE_TRANSACTION_REQUEST: "/entity/{entityId}/table/transaction/async", GET_VALIDATION_SCHEMA_REQUEST: "/schema/type/validation/async", diff --git a/synapseclient/operations/__init__.py b/synapseclient/operations/__init__.py index ab112f612..5134b03a4 100644 --- a/synapseclient/operations/__init__.py +++ b/synapseclient/operations/__init__.py @@ -1,4 +1,17 @@ from synapseclient.operations.delete_operations import delete, delete_async +from synapseclient.operations.download_list_operations import ( + DownloadListItem, + download_list_add, + download_list_add_async, + download_list_clear, + download_list_clear_async, + download_list_files, + download_list_files_async, + download_list_manifest, + download_list_manifest_async, + download_list_remove, + download_list_remove_async, +) from synapseclient.operations.factory_operations import ( ActivityOptions, FileOptions, @@ -47,6 +60,18 @@ # Delete operations "delete", "delete_async", + # Download list operations + "DownloadListItem", + "download_list_files", + "download_list_files_async", + "download_list_manifest", + "download_list_manifest_async", + "download_list_add", + "download_list_add_async", + "download_list_remove", + "download_list_remove_async", + "download_list_clear", + "download_list_clear_async", # Utility operations "find_entity_id", "find_entity_id_async", diff --git a/synapseclient/operations/download_list_operations.py b/synapseclient/operations/download_list_operations.py new file mode 100644 index 000000000..f2b20a3a4 --- /dev/null +++ b/synapseclient/operations/download_list_operations.py @@ -0,0 +1,943 @@ +"""Operations for the user's Synapse Download List (cart). + +The download list is a user-scoped cart of files queued for bulk download. +Files can be added via the Synapse web UI or API and downloaded in batch. + +Files are not packaged into a zip because download lists can exceed 100 GB. +Instead, files are downloaded individually and removed from the list after +successful download, so interrupted runs are safely resumable. +""" + +import asyncio +import csv +import os +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Optional + +from synapseclient.api.download_list_services import ( + add_to_download_list_async, + clear_download_list_async, + remove_from_download_list_async, +) +from synapseclient.core.async_utils import wrap_async_to_sync +from synapseclient.core.constants.concrete_types import DOWNLOAD_LIST_MANIFEST_REQUEST +from synapseclient.core.exceptions import SynapseError +from synapseclient.models.mixins.asynchronous_job import AsynchronousCommunicator +from synapseclient.models.table_components import CsvTableDescriptor + +if TYPE_CHECKING: + from synapseclient import Synapse + +_ID_COLUMN = "ID" +_VERSION_COLUMN = "versionNumber" +_PATH_COLUMN = "path" +_ERROR_COLUMN = "error" + + +@dataclass +class DownloadListItem: + """A single item for a user's download list. + + + + Attributes: + file_entity_id: Synapse ID of the file entity (e.g. "syn123"). + version_number: Version of the file to target. + """ + + file_entity_id: str + """Synapse ID of the file entity (e.g. "syn123").""" + + version_number: Optional[int] = None + """Version of the file to target.""" + + +@dataclass +class _DownloadListManifestRequest(AsynchronousCommunicator): + """Drives the full lifecycle of a Synapse async manifest job in one object. + + Calling send_job_and_wait_async() executes four phases automatically: + + **Phase 1 — Submit** (to_synapse_request) + Builds the POST body and submits it to + POST /download/list/manifest/async/start. Synapse starts a + background job and returns a token. + + **Phase 2 — Poll** (AsynchronousCommunicator base class) + Polls GET /download/list/manifest/async/get/{token} until the job + state is COMPLETE (or the timeout is reached). No code needed here + — the base class handles this using the endpoint registered in + ASYNC_JOB_URIS for this class's concrete_type. + + **Phase 3 — Parse response** (fill_from_dict) + Extracts resultFileHandleId from the completed job response and + stores it in self.result_file_handle_id. + + **Phase 4 — Download** (_post_exchange_async) + Retrieves file handle metadata via get_file_handle() and a + pre-signed S3 URL via get_file_handle_presigned_url(), then + streams the CSV to disk via download_from_url() (run in a thread + pool via asyncio.to_thread since it is a blocking sync method). + Stores the local path in self.manifest_path. + + After send_job_and_wait_async() returns, manifest_path holds the + local path to the downloaded CSV and is ready to use. + + + + """ + + concrete_type: str = field( + init=False, + default=DOWNLOAD_LIST_MANIFEST_REQUEST, + ) + """The concreteType string sent in the request body. Set automatically; + registered in ASYNC_JOB_URIS to resolve the REST endpoint.""" + + result_file_handle_id: Optional[str] = field(init=False, default=None) + """File handle ID of the generated manifest CSV. Populated by + fill_from_dict() from the resultFileHandleId field of the job + response. None until the job completes.""" + + manifest_path: Optional[str] = field(init=False, default=None) + """Absolute local path of the downloaded manifest CSV. Populated by + _post_exchange_async() after the file is downloaded. None until + send_job_and_wait_async() returns.""" + + csv_table_descriptor: CsvTableDescriptor = field( + default_factory=CsvTableDescriptor, + ) + """Describes the format of the generated CSV manifest.""" + + def to_synapse_request(self) -> dict[str, Any]: + """Build the request body for the manifest async job. + + Constructs the POST body for + POST /download/list/manifest/async/start including the concrete type + and CSV descriptor. + + Returns: + A dictionary containing the request body expected by the Synapse + REST API. + """ + return { + "concreteType": self.concrete_type, + "csvTableDescriptor": self.csv_table_descriptor.to_synapse_request(), + } + + def fill_from_dict( + self, synapse_response: dict[str, Any] + ) -> "_DownloadListManifestRequest": + """Converts the data coming from the Synapse async job response into + this data class. + + Extracts the resultFileHandleId from the completed job response and + stores it in result_file_handle_id. + + Arguments: + synapse_response: The response dict from the completed Synapse + async manifest job. + + Returns: + The _DownloadListManifestRequest object instance. + """ + self.result_file_handle_id = synapse_response.get("resultFileHandleId") + return self + + async def _post_exchange_async( + self, synapse_client: Optional["Synapse"] = None, **kwargs + ) -> None: + """Download the manifest CSV from Synapse after the async job completes. + + Retrieves the file handle metadata and a pre-signed S3 URL using + creator-based endpoints (no entity association required), then + streams the CSV to disk using download_from_url (run in a thread + pool to avoid blocking the event loop). On success, sets + self.manifest_path to the local path of the downloaded file. + + Arguments: + synapse_client: The Synapse client to use for the request. Uses + the cached singleton if omitted. + **kwargs: Additional arguments. Supports destination (str) to + control the download directory; defaults to the current + working directory. + """ + from synapseclient import Synapse + from synapseclient.api.file_services import ( + get_file_handle, + get_file_handle_presigned_url, + ) + from synapseclient.core.download.download_functions import download_from_url + + destination = kwargs.get("destination", ".") + client = Synapse.get_client(synapse_client=synapse_client) + file_handle = await get_file_handle( + file_handle_id=self.result_file_handle_id, + synapse_client=client, + ) + presigned_url = await get_file_handle_presigned_url( + file_handle_id=self.result_file_handle_id, + synapse_client=client, + ) + self.manifest_path = await asyncio.to_thread( + download_from_url, + url=presigned_url, + destination=destination, + file_handle_id=file_handle["id"], + expected_md5=file_handle.get("contentMd5"), + url_is_presigned=True, + synapse_client=client, + ) + + +def download_list_files( + download_location: Optional[str] = None, + *, + parallel: bool = False, + max_concurrent: int = 10, + synapse_client: Optional["Synapse"] = None, +) -> str: + """Download all files in the Synapse download list (cart) to a local directory. + + Files are downloaded individually. The cart is not packaged into a zip because + download lists can exceed 100 GB. Only successfully downloaded files are removed + from the cart after the full pass completes, so interrupted runs are safely + resumable. + + Files that cannot be accessed or fail to download are left in the cart and + recorded with an error value in the result manifest. + + Arguments: + download_location: Directory to download files to. Defaults to the + current working directory. + parallel: If True, files are downloaded concurrently up to + max_concurrent at a time using asyncio.gather. If False + (default), files are downloaded sequentially. + max_concurrent: Maximum number of files to download concurrently when + parallel=True. Defaults to 10. Has no effect when + parallel=False. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + Path to the result manifest CSV, which contains all original manifest + columns plus path (local file path) and error (error message or + empty string) columns. + + Raises: + SynapseHTTPError: If the manifest async job fails or the cart is empty + ("No files available for download"). + SynapseError: If the manifest job completes but produces no local file, + or if the downloaded CSV has no headers or contains reserved column + names ("path" or "error"). + + Example: Download all files in the cart +   + Download all files in the user's download list to a local directory. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_files + + syn = Synapse() + syn.login() + + manifest_path = download_list_files(download_location="./data") + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_files_async( + download_location=download_location, + parallel=parallel, + max_concurrent=max_concurrent, + synapse_client=synapse_client, + ) + ) + + +async def download_list_files_async( + download_location: Optional[str] = None, + *, + parallel: bool = False, + max_concurrent: int = 10, + synapse_client: Optional["Synapse"] = None, +) -> str: + """Download all files in the Synapse download list (cart) to a local directory. + + Files are downloaded individually. The cart is not packaged into a zip because + download lists can exceed 100 GB. Only successfully downloaded files are removed + from the cart after the full pass completes, so interrupted runs are safely + resumable. + + Files that cannot be accessed or fail to download are left in the cart and + recorded with an error value in the result manifest. + + Arguments: + download_location: Directory to download files to. Defaults to the + current working directory. + parallel: If True, files are downloaded concurrently up to + max_concurrent at a time using asyncio.gather. If False + (default), files are downloaded sequentially. + max_concurrent: Maximum number of files to download concurrently when + parallel=True. Defaults to 10. Has no effect when + parallel=False. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + Path to the result manifest CSV, which contains all original manifest + columns plus path (local file path) and error (error message or + empty string) columns. + + Raises: + SynapseHTTPError: If the manifest async job fails or the cart is empty + ("No files available for download"). + SynapseError: If the manifest job completes but produces no local file, + or if the downloaded CSV has no headers or contains reserved column + names ("path" or "error"). + + Example: Download all files in the cart +   + Download all files in the user's download list to a local directory. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_files_async + + async def main(): + syn = Synapse() + syn.login() + + manifest_path = await download_list_files_async(download_location="./data") + + asyncio.run(main()) + ``` + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + + if download_location is not None: + download_location = os.path.expandvars(os.path.expanduser(download_location)) + + # 1. Fetch the server-generated manifest and read it into memory + manifest_path = await download_list_manifest_async(synapse_client=client) + try: + columns, rows = await asyncio.to_thread(_read_manifest_rows, manifest_path) + finally: + os.remove(manifest_path) + + # 2. Validate manifest columns and append result columns + columns = _validate_and_extend_columns(columns) + + # 3. Download each file in the manifest + downloaded_files = await _download_all_manifest_files( + rows=rows, + download_location=download_location, + parallel=parallel, + max_concurrent=max_concurrent, + synapse_client=client, + ) + + # 4. Write the result manifest with path/error columns + new_manifest_path = await _save_result_manifest( + rows=rows, + columns=columns, + download_location=download_location, + ) + + # 5. Remove successfully downloaded files from the cart. The Synapse API + # requires the (fileEntityId, versionNumber) pair at removal to match + # exactly what was used at add time -- a no-version add is only matched + # by a no-version remove. The manifest always carries a resolved + # versionNumber, so a versioned remove silently fails (returns 0) for + # entries that were added without a version. When that happens, retry + # the same entity with version_number=None to match the no-version add. + if downloaded_files: + for item in downloaded_files: + removed = await remove_from_download_list_async( + files=[item], + synapse_client=client, + ) + if removed == 0: + await remove_from_download_list_async( + files=[DownloadListItem(file_entity_id=item.file_entity_id)], + synapse_client=client, + ) + else: + client.logger.warning("A manifest was created, but no files were downloaded") + + return new_manifest_path + + +def download_list_manifest( + *, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + destination: str = ".", + synapse_client: Optional["Synapse"] = None, +) -> str: + """Generate and download the manifest CSV for the current cart contents. + + Submits an async job to Synapse to generate the manifest, then downloads + the resulting CSV. The manifest contains the same columns as the zip + manifest downloaded from the Synapse web UI. + + Arguments: + csv_table_descriptor: Optional CsvTableDescriptor controlling the + format of the generated CSV (separator, quote character, escape + character, line ending, and whether the first line is a header). + When omitted the Synapse defaults are used. + destination: Directory to download the manifest CSV to. Defaults to + the current working directory. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Raises: + SynapseError: If the async job completes without producing a manifest. + + Returns: + Path to the downloaded manifest CSV. + + Example: Get the download list manifest +   + Inspect the cart contents before downloading. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest + + syn = Synapse() + syn.login() + + manifest_path = download_list_manifest() + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_manifest_async( + csv_table_descriptor=csv_table_descriptor, + destination=destination, + synapse_client=synapse_client, + ) + ) + + +async def download_list_manifest_async( + *, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + destination: str = ".", + synapse_client: Optional["Synapse"] = None, +) -> str: + """Generate and download the manifest CSV for the current cart contents. + + Submits an async job to Synapse to generate the manifest, then downloads + the resulting CSV. The manifest contains the same columns as the zip + manifest downloaded from the Synapse web UI. + + Arguments: + csv_table_descriptor: Optional CsvTableDescriptor controlling the + format of the generated CSV (separator, quote character, escape + character, line ending, and whether the first line is a header). + When omitted the Synapse defaults are used. + destination: Directory to download the manifest CSV to. Defaults to + the current working directory. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Raises: + SynapseError: If the async job completes without producing a manifest. + + Returns: + Path to the downloaded manifest CSV. + + Example: Get the download list manifest +   + Inspect the cart contents before downloading. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest_async + + async def main(): + syn = Synapse() + syn.login() + + manifest_path = await download_list_manifest_async() + + asyncio.run(main()) + ``` + """ + manifest_request = _DownloadListManifestRequest( + csv_table_descriptor=csv_table_descriptor or CsvTableDescriptor(), + ) + await manifest_request.send_job_and_wait_async( + post_exchange_args={"destination": destination}, + synapse_client=synapse_client, + ) + if manifest_request.manifest_path is None: + raise SynapseError( + "Manifest job completed but no local file was produced. " + "The download from Synapse may have failed silently." + ) + return manifest_request.manifest_path + + +def download_list_add( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Add files to the Synapse download list. + + If a file is added with no version specified, the latest version will be downloaded. + + Arguments: + files: List of DownloadListItem objects identifying the file + versions to add. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files added. + + Example: Add files to the download list +   + Add specific file versions to the cart. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_add, DownloadListItem + + syn = Synapse() + syn.login() + + count = download_list_add([ + DownloadListItem(file_entity_id="syn123", version_number=1), + DownloadListItem(file_entity_id="syn456", version_number=2), + ]) + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_add_async(files=files, synapse_client=synapse_client) + ) + + +async def download_list_add_async( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Add files to the Synapse download list. + + If a file is added with no version specified, the latest version will be downloaded. + + Arguments: + files: List of DownloadListItem objects identifying the file + versions to add. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files added. + + Example: Add files to the download list +   + Add specific file versions to the cart. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_add_async, DownloadListItem + + async def main(): + syn = Synapse() + syn.login() + + count = await download_list_add_async([ + DownloadListItem(file_entity_id="syn123", version_number=1), + DownloadListItem(file_entity_id="syn456", version_number=2), + ]) + + asyncio.run(main()) + ``` + """ + return await add_to_download_list_async( + files=files, + synapse_client=synapse_client, + ) + + +def download_list_remove( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Remove files from the Synapse download list. + + If a file was added with a version specified, then that version must be specified to remove it. + If a file was added with no version specified, then no version must be specified to remove it. + + Arguments: + files: List of DownloadListItem objects identifying the file versions to remove. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files removed. + + Example: Remove files from the download list +   + Remove specific file versions from the cart. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_remove, DownloadListItem + + syn = Synapse() + syn.login() + + count = download_list_remove([ + DownloadListItem(file_entity_id="syn123", version_number=1), + ]) + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_remove_async(files=files, synapse_client=synapse_client) + ) + + +async def download_list_remove_async( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Remove files from the Synapse download list. + + If a file was added with a version specified, then that version must be specified to remove it. + If a file was added with no version specified, then no version must be specified to remove it. + + Arguments: + files: List of DownloadListItem objects identifying the file versions to remove. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files removed. + + Example: Remove files from the download list +   + Remove specific file versions from the cart. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_remove_async, DownloadListItem + + async def main(): + syn = Synapse() + syn.login() + + count = await download_list_remove_async([ + DownloadListItem(file_entity_id="syn123", version_number=1), + ]) + + asyncio.run(main()) + ``` + """ + return await remove_from_download_list_async( + files=files, + synapse_client=synapse_client, + ) + + +def download_list_clear( + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Clear all files from the Synapse download list (cart). + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Example: Clear the download list +   + Remove all files from the cart. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_clear + + syn = Synapse() + syn.login() + + download_list_clear() + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_clear_async(synapse_client=synapse_client) + ) + + +async def download_list_clear_async( + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Clear all files from the Synapse download list (cart). + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Example: Clear the download list +   + Remove all files from the cart. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_clear_async + + async def main(): + syn = Synapse() + syn.login() + + await download_list_clear_async() + + asyncio.run(main()) + ``` + """ + await clear_download_list_async(synapse_client=synapse_client) + + +def _read_manifest_rows( + path: str, +) -> tuple[Optional[list[str]], list[dict[str, Any]]]: + """Read the server-generated manifest CSV into memory. + + Arguments: + path: Local path to the server-generated manifest CSV. + + Returns: + (columns, rows) where columns is the list of field names and + rows is a list of row dicts (possibly empty). Returns + (None, []) if the CSV file has no column headers. + """ + with open(path, newline="") as f: + reader = csv.DictReader(f) + columns = reader.fieldnames + rows = list(reader) + if not columns: + return None, [] + return list(columns), rows + + +def _validate_and_extend_columns( + columns: Optional[list[str]], +) -> list[str]: + """Validate server manifest columns and append the result columns. + + Ensures the server-generated manifest has headers and does not already + contain the reserved "path" or "error" column names that are appended + to the output manifest. + + Arguments: + columns: Column names from the server manifest, or None if the + CSV had no headers. + + Raises: + SynapseError: If columns is None (empty manifest) or contains + reserved column names. + + Returns: + The original columns with "path" and "error" appended. + """ + if columns is None: + raise SynapseError( + "Manifest job succeeded but the downloaded CSV has no headers. " + "This is unexpected — the Synapse server may have returned an empty file." + ) + + if _PATH_COLUMN in columns or _ERROR_COLUMN in columns: + raise SynapseError( + "The downloaded manifest CSV contains reserved column names 'path' or 'error'. " + "This is unexpected and may indicate a malformed manifest from the server, " + "or Synapse has added these columns." + ) + + return list(columns) + [_PATH_COLUMN, _ERROR_COLUMN] + + +async def _download_all_manifest_files( + rows: list[dict[str, Any]], + download_location: Optional[str], + parallel: bool = False, + max_concurrent: int = 10, + *, + synapse_client: Optional["Synapse"] = None, +) -> list[DownloadListItem]: + """Download all files from the manifest, either sequentially or concurrently. + + Arguments: + rows: List of row dicts from the manifest. Each row is mutated in + place by _download_manifest_file to include "path" and + "error" values. + download_location: Directory to download files to. + parallel: If True, rows are downloaded concurrently (bounded by + max_concurrent) via asyncio.gather. If False, rows are + downloaded one at a time. + max_concurrent: Maximum number of concurrent downloads when + parallel=True. Defaults to 10. Must be at least 1. Has no + effect when parallel=False. + synapse_client: Optional Synapse client. + + Raises: + ValueError: If max_concurrent is less than 1. + + Returns: + List of DownloadListItem for each successfully downloaded file. + """ + if max_concurrent < 1: + raise ValueError(f"max_concurrent must be at least 1, got {max_concurrent}.") + if parallel: + # asyncio.gather schedules all coroutines immediately, so without a + # semaphore a large cart would fire hundreds of concurrent HTTP requests + # at once — risking rate-limiting from Synapse and exhausting local + # file-descriptor / memory limits. The semaphore lets all coroutines + # be created (preserving gather's result ordering) while ensuring that + # at most max_concurrent are actually running at any given time. + sem = asyncio.Semaphore(max_concurrent) + + async def bounded_download( + row: dict[str, Any], + ) -> Optional[DownloadListItem]: + async with sem: + return await _download_manifest_file( + row, + download_location=download_location, + synapse_client=synapse_client, + ) + + items = await asyncio.gather(*[bounded_download(row) for row in rows]) + return [item for item in items if item is not None] + else: + downloaded: list[DownloadListItem] = [] + for row in rows: + item = await _download_manifest_file( + row, + download_location=download_location, + synapse_client=synapse_client, + ) + if item is not None: + downloaded.append(item) + return downloaded + + +async def _download_manifest_file( + row: dict[str, Any], + download_location: Optional[str] = None, + *, + synapse_client: Optional["Synapse"] = None, +) -> Optional[DownloadListItem]: + """Download the file described by a manifest row and record the result in place. + + On success, sets row["path"] to the local file path and row["error"] + to "". On failure, sets row["path"] to "" and row["error"] to the + error message. Failures are logged but never raised, so one bad file + does not abort the entire batch. + + Arguments: + row: A manifest row dict. Must contain an "ID" key; "versionNumber" + is optional and defaults to the latest version when missing or + blank. Modified in place to add "path" and "error" entries. + download_location: Directory to download the file to. Defaults to + the Synapse cache location if None. + synapse_client: Optional Synapse client. Uses cached singleton if omitted. + + Returns: + A DownloadListItem on success, or None on failure. + """ + from synapseclient import Synapse + from synapseclient.models.file import File + + client = Synapse.get_client(synapse_client=synapse_client) + entity_id = row[_ID_COLUMN] + version_str = row.get(_VERSION_COLUMN) + version_number = int(version_str) if version_str else None + + try: + file = await File( + id=entity_id, + version_number=version_number, + path=download_location, + ).get_async(synapse_client=client) + row[_PATH_COLUMN] = file.path or "" + row[_ERROR_COLUMN] = "" + return DownloadListItem( + file_entity_id=entity_id, + version_number=version_number, + ) + except Exception as e: + row[_PATH_COLUMN] = "" + row[_ERROR_COLUMN] = str(e) + client.logger.exception(f"Unable to download {entity_id} v{version_number}") + return None + + +async def _save_result_manifest( + rows: list[dict[str, Any]], + columns: list[str], + download_location: Optional[str], +) -> str: + """Write the annotated rows to a new result manifest CSV and return its path. + + Arguments: + rows: List of row dicts, each mutated by _download_manifest_file to + include "path" and "error" values. + columns: Field names for the CSV header, including "path" and + "error". + download_location: Directory to write the manifest to. Defaults to + the current working directory if None. + + Returns: + Absolute path to the written manifest CSV. + """ + directory = download_location or "." + os.makedirs(directory, exist_ok=True) + path = os.path.join(directory, f"manifest_{time.time_ns()}.csv") + # Run the synchronous CSV write in a thread pool so it does not block + # the event loop. Blocking the event loop here would stall all other + # pending coroutines (network requests, timeouts, etc.) for the + # duration of the file write. + await asyncio.to_thread( + _write_result_manifest, + path=path, + columns=columns, + rows=rows, + ) + return path + + +def _write_result_manifest( + path: str, + columns: list[str], + rows: list[dict[str, Any]], +) -> None: + """Write the annotated result rows to the output manifest CSV. + + Intended to be called via asyncio.to_thread to avoid blocking the + event loop on synchronous file I/O. + + Arguments: + path: Destination path for the output manifest CSV. + columns: Field names for the CSV header, including "path" and + "error". + rows: List of row dicts, each mutated by _download_manifest_file to + include "path" and "error" values. + """ + with open(path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=columns, extrasaction="ignore") + writer.writeheader() + writer.writerows(rows) diff --git a/tests/CLAUDE.md b/tests/CLAUDE.md index 432ce0240..39e0e459e 100644 --- a/tests/CLAUDE.md +++ b/tests/CLAUDE.md @@ -19,6 +19,7 @@ Use `pytest.mark.parametrize` when possible to merge similar tests into one test - Use `AsyncMock` for async method mocking, `create_autospec` for type-safe mocks - Class-based test organization with `@pytest.fixture(scope="function", autouse=True)` for setup - Test file naming: `unit_test_*.py` (legacy) or `test_*.py` (newer) — both patterns are discovered by pytest +- Mock isolation: when mocking fixture/instance attributes (e.g., `self.syn.rest_post_async`), always wrap in `patch.object()` context managers instead of direct assignment. This prevents the mock from leaking to other tests: `with patch.object(self.syn, "method_name", new_callable=AsyncMock, return_value=...):`. Direct assignment leaves the mock in place after the test, polluting subsequent tests in the class. ### Integration tests (`tests/integration/`) - All async tests share one event loop: `asyncio_default_fixture_loop_scope = session` diff --git a/tests/integration/synapseclient/operations/async/test_download_list_operations_async.py b/tests/integration/synapseclient/operations/async/test_download_list_operations_async.py new file mode 100644 index 000000000..9395b4e4a --- /dev/null +++ b/tests/integration/synapseclient/operations/async/test_download_list_operations_async.py @@ -0,0 +1,610 @@ +"""Integration tests for download_list operation functions. + +The Synapse download list is a user-scoped resource: every test run against +the same Synapse account shares one cart. To coexist with other tests and +concurrent CI runs, these tests track the items they add and remove only +those items on teardown, instead of calling download_list_clear_async() as a global +reset. Assertions reason only about the test's own file ids, never about +the cart being globally empty. +""" + +import csv +import os +import tempfile +import uuid +from typing import Callable + +import pytest +import pytest_asyncio + +import synapseclient.core.utils as utils +from synapseclient import Project, Synapse +from synapseclient.core.exceptions import SynapseHTTPError +from synapseclient.models import File +from synapseclient.models.table_components import CsvTableDescriptor +from synapseclient.operations import ( + DownloadListItem, + download_list_add_async, + download_list_files_async, + download_list_manifest_async, + download_list_remove_async, +) + + +@pytest_asyncio.fixture +async def scheduled_for_cart_removal(syn: Synapse): + """Track items a test adds to the cart and remove only those items on teardown.""" + scheduled: list[DownloadListItem] = [] + yield scheduled + if scheduled: + try: + await download_list_remove_async(files=scheduled, synapse_client=syn) + except Exception as e: + pytest.fail( + f"Cart teardown failed — {len(scheduled)} item(s) may remain in " + f"the cart and affect subsequent tests: {e}" + ) + + +async def _create_test_file( + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], +) -> File: + """Upload a small test file to Synapse and return the File model.""" + path = utils.make_bogus_uuid_file() + schedule_for_cleanup(path) + file = File( + parent_id=project["id"], + path=path, + name=f"download_list_test_{uuid.uuid4()}", + ) + await file.store_async(synapse_client=syn) + schedule_for_cleanup(file.id) + return file + + +async def _upload_new_version( + file: File, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], +) -> int: + """Upload a new version of an existing file and return the new version number.""" + new_path = utils.make_bogus_uuid_file() + schedule_for_cleanup(new_path) + file.path = new_path + await file.store_async(synapse_client=syn) + return file.version_number + + +async def _add_to_cart( + file: File, + syn: Synapse, + scheduled_for_cart_removal: list[DownloadListItem], +) -> None: + """Add a single file to the Synapse download list cart and register it + for teardown removal.""" + item = DownloadListItem( + file_entity_id=file.id, + version_number=file.version_number, + ) + await download_list_add_async(files=[item], synapse_client=syn) + scheduled_for_cart_removal.append(item) + + +async def _cart_entries( + syn: Synapse, + schedule_for_cleanup: Callable[..., None], +) -> set[tuple[str, int]]: + """Return all (file_id, version_number) pairs currently in the user's cart. + + Returns an empty set when the cart is empty. Synapse returns HTTP 400 with + the message 'No files available for download' in that case rather than + producing an empty CSV. If this string changes server-side, update it here + and in download_list_files_async's documented 'Raises' section. + See POST /download/list/manifest/async/start in the Synapse REST docs + (DownloadListController). + """ + try: + manifest_path = await download_list_manifest_async(synapse_client=syn) + except SynapseHTTPError as e: + if "No files available for download" in str(e): + return set() + raise + schedule_for_cleanup(manifest_path) + with open(manifest_path, newline="") as f: + return {(row["ID"], int(row["versionNumber"])) for row in csv.DictReader(f)} + + +class TestDownloadListAddAsync: + """Integration tests for download_list_add_async. + + - test_adds_specific_version_of_each_file_in_one_call: multiple files and versions added in one call + - test_download_list_add_with_no_version_number: version_number=None adds latest version + """ + + async def test_adds_specific_version_of_each_file_in_one_call( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_add_async() adds multiple files with multiple versions in a single call.""" + # GIVEN two files, each with two versions; we'll select v1 of file_a and v2 of file_b + file_a = await _create_test_file(project, syn, schedule_for_cleanup) + file_a_v1 = file_a.version_number + await _upload_new_version(file_a, syn, schedule_for_cleanup) + + file_b = await _create_test_file(project, syn, schedule_for_cleanup) + await _upload_new_version(file_b, syn, schedule_for_cleanup) + file_b_v2 = file_b.version_number + + # WHEN I add file_a v1 and file_b v2 in one call + items = [ + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v1), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v2), + ] + count = await download_list_add_async(files=items, synapse_client=syn) + scheduled_for_cart_removal.extend(items) + cart_entries = { + e + for e in await _cart_entries(syn, schedule_for_cleanup) + if e[0] in {file_a.id, file_b.id} + } + + # THEN the returned count is 2 + assert count == 2, f"Expected 2 files added, got {count}" + + # AND only the added versions appear in the manifest for these file ids + assert cart_entries == { + (file_a.id, file_a_v1), + (file_b.id, file_b_v2), + }, f"Unexpected cart contents for test files: {cart_entries}" + + async def test_download_list_add_with_no_version_number( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_add_async() with version_number=None adds the latest version.""" + # GIVEN a file with two versions + file = await _create_test_file(project, syn, schedule_for_cleanup) + v1 = file.version_number + v2 = await _upload_new_version(file, syn, schedule_for_cleanup) + assert v2 != v1, "Expected a new version number" + + # WHEN I add the file without specifying a version number + item_no_version = DownloadListItem(file_entity_id=file.id) + count = await download_list_add_async( + files=[item_no_version], synapse_client=syn + ) + scheduled_for_cart_removal.append(item_no_version) + cart_entries = { + e for e in await _cart_entries(syn, schedule_for_cleanup) if e[0] == file.id + } + + # THEN the file is added to the cart with the latest version + assert count == 1, f"Expected 1 file added, got {count}" + + # AND the file appears in the manifest at the latest version + assert cart_entries == { + (file.id, v2) + }, f"Expected one row for {file.id} at v{v2}, got {cart_entries}" + + +class TestDownloadListRemoveAsync: + """Integration tests for download_list_remove_async. + + - test_download_list_remove_removes_only_specified_files: selective version removal + - test_download_list_remove_wrong_version_leaves_file_in_cart: wrong version is a no-op + - test_download_list_remove_no_version_leaves_file_in_cart: omitted version does not match explicit version + - test_download_list_remove_no_version_matches_no_version_entry: omitted version removes no-version entry + """ + + async def test_download_list_remove_removes_only_specified_files( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() removes only the specified file versions, not others.""" + # GIVEN two files, each with two versions + file_a = await _create_test_file(project, syn, schedule_for_cleanup) + file_a_v1 = file_a.version_number + file_a_v2 = await _upload_new_version(file_a, syn, schedule_for_cleanup) + + file_b = await _create_test_file(project, syn, schedule_for_cleanup) + file_b_v1 = file_b.version_number + file_b_v2 = await _upload_new_version(file_b, syn, schedule_for_cleanup) + + # AND all four versions are added to the cart + added = [ + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v1), + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v2), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v1), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v2), + ] + await download_list_add_async(files=added, synapse_client=syn) + scheduled_for_cart_removal.extend(added) + + # WHEN I remove file_a v1 and file_b v2 + removed = await download_list_remove_async( + files=[ + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v1), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v2), + ], + synapse_client=syn, + ) + our_ids = {file_a.id, file_b.id} + cart_entries = { + e for e in await _cart_entries(syn, schedule_for_cleanup) if e[0] in our_ids + } + + # THEN exactly 2 items were removed + assert removed == 2, f"Expected 2 files removed, got {removed}" + + # AND the manifest (filtered to our file ids) contains only file_a v2 and file_b v1 + assert cart_entries == { + (file_a.id, file_a_v2), + (file_b.id, file_b_v1), + }, f"Unexpected cart contents for test files: {cart_entries}" + + async def test_download_list_remove_wrong_version_leaves_file_in_cart( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() with a wrong version is a no-op -- the file stays in the cart.""" + # GIVEN a cart entry for a file (added with an explicit version) + file = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I try to remove the file with a wrong version number + removed = await download_list_remove_async( + files=[ + DownloadListItem( + file_entity_id=file.id, + version_number=(file.version_number or 1) + 99, + ) + ], + synapse_client=syn, + ) + + # THEN no files are removed and the file remains in the cart + assert removed == 0, f"Expected 0 files removed, got {removed}" + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert file.id in cart_ids, f"Expected {file.id} to remain in the cart" + + async def test_download_list_remove_no_version_leaves_file_in_cart( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() with no version does not match a cart entry that was + added with an explicit version -- the API requires an exact + (fileEntityId, versionNumber) pair.""" + # GIVEN a cart entry for a file (added with an explicit version) + file = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I try to remove the file without specifying a version + removed = await download_list_remove_async( + files=[DownloadListItem(file_entity_id=file.id)], + synapse_client=syn, + ) + + # THEN no files are removed and the file remains in the cart + assert removed == 0, f"Expected 0 files removed, got {removed}" + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert file.id in cart_ids, f"Expected {file.id} to remain in the cart" + + async def test_download_list_remove_no_version_matches_no_version_entry( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() with no version removes a cart entry that was also + added without a version.""" + # GIVEN a cart entry for a file added without a version number + file = await _create_test_file(project, syn, schedule_for_cleanup) + item_no_version = DownloadListItem(file_entity_id=file.id) + await download_list_add_async(files=[item_no_version], synapse_client=syn) + scheduled_for_cart_removal.append(item_no_version) + + # WHEN I remove the file without specifying a version + removed = await download_list_remove_async( + files=[DownloadListItem(file_entity_id=file.id)], + synapse_client=syn, + ) + + # THEN the file is reported as removed and no longer appears in the cart + assert removed == 1, f"Expected 1 file removed, got {removed}" + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert file.id not in cart_ids, f"Expected {file.id} to be absent from the cart" + + +class TestDownloadListFilesAsync: + """Integration tests for download_list_files_async. + + - test_download_list_files_downloads_and_removes_from_cart: sequential and parallel download + - test_download_list_files_multiple_versions_of_same_file: two versions both download + - test_download_list_files_default_location: omitting download_location writes to CWD + - test_download_list_files_no_version_add_is_removed_from_cart: + no-version add is downloaded and removed from the cart + """ + + @pytest.mark.parametrize("parallel", [False, True]) + async def test_download_list_files_downloads_and_removes_from_cart( + self, + parallel: bool, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """Downloaded files are present in the manifest and removed from cart.""" + # GIVEN two files added to the cart + file_a = await _create_test_file(project, syn, schedule_for_cleanup) + file_b = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file_a, syn, scheduled_for_cart_removal) + await _add_to_cart(file_b, syn, scheduled_for_cart_removal) + + # WHEN I download the files + with tempfile.TemporaryDirectory() as tmpdir: + manifest_path = await download_list_files_async( + download_location=tmpdir, + parallel=parallel, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the manifest contains both files with valid paths and no errors + assert os.path.exists(manifest_path) + with open(manifest_path, newline="") as f: + reader = csv.DictReader(f) + rows = list(reader) + + my_rows = [r for r in rows if r["ID"] in {file_a.id, file_b.id}] + ids_in_manifest = {row["ID"] for row in my_rows} + assert file_a.id in ids_in_manifest + assert file_b.id in ids_in_manifest + + for row in my_rows: + assert ( + row["error"] == "" + ), f"Unexpected error for {row['ID']}: {row['error']}" + assert os.path.exists( + row["path"] + ), f"File not downloaded: {row['path']}" + + # AND our files are no longer in the cart after successful downloads + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert ( + file_a.id not in cart_ids + ), f"Expected {file_a.id} to be removed from cart after download" + assert ( + file_b.id not in cart_ids + ), f"Expected {file_b.id} to be removed from cart after download" + + async def test_download_list_files_multiple_versions_of_same_file( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """Cart can hold two versions of the same file and both are downloaded.""" + # GIVEN a file with two versions, both added to the cart + file = await _create_test_file(project, syn, schedule_for_cleanup) + v1_id = file.id + v1_version = file.version_number + v2_version = await _upload_new_version(file, syn, schedule_for_cleanup) + assert v2_version != v1_version, "Expected a new version number" + + items = [ + DownloadListItem(file_entity_id=v1_id, version_number=v1_version), + DownloadListItem(file_entity_id=v1_id, version_number=v2_version), + ] + await download_list_add_async(files=items, synapse_client=syn) + scheduled_for_cart_removal.extend(items) + + # WHEN I download the cart + with tempfile.TemporaryDirectory() as tmpdir: + manifest_path = await download_list_files_async( + download_location=tmpdir, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the manifest contains two rows for the same entity ID + with open(manifest_path, newline="") as f: + reader = csv.DictReader(f) + rows = [r for r in reader if r["ID"] == v1_id] + + assert len(rows) == 2, f"Expected 2 rows for {v1_id}, got {len(rows)}" + versions_in_manifest = {int(r["versionNumber"]) for r in rows} + assert versions_in_manifest == { + v1_version, + v2_version, + }, f"Expected versions {v1_version} and {v2_version}, got {versions_in_manifest}" + for row in rows: + assert ( + row["path"] != "" + ), f"Missing path for version {row['versionNumber']}" + assert ( + row["error"] == "" + ), f"Error for version {row['versionNumber']}: {row['error']}" + + # AND our file is no longer in the cart + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert ( + v1_id not in cart_ids + ), f"Expected {v1_id} to be removed from cart after download" + + async def test_download_list_files_default_location( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_files_async() with download_location=None writes to CWD.""" + # GIVEN a cart containing one of our files + file = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I download with no explicit download_location (uses CWD) + with tempfile.TemporaryDirectory() as tmpdir: + original_cwd = os.getcwd() + try: + os.chdir(tmpdir) + manifest_path = await download_list_files_async( + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the manifest is written under the CWD + abs_manifest = os.path.abspath(manifest_path) + assert os.path.exists(abs_manifest) + # Normalize both paths with realpath -- on macOS /var is a + # symlink to /private/var, so tmpdir and the resolved manifest + # path can differ even when the manifest is under tmpdir. + assert os.path.realpath(abs_manifest).startswith( + os.path.realpath(tmpdir) + ), f"Expected manifest under {tmpdir}, got {abs_manifest}" + + with open(manifest_path, newline="") as f: + reader = csv.DictReader(f) + rows = list(reader) + + file_row = next(r for r in rows if r["ID"] == file.id) + assert file_row["error"] == "" + finally: + os.chdir(original_cwd) + + async def test_download_list_files_no_version_add_is_removed_from_cart( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """A file added to the cart without a version is downloaded + successfully and removed from the cart. + """ + # GIVEN a file added to the cart without a version number + file = await _create_test_file(project, syn, schedule_for_cleanup) + item_no_version = DownloadListItem(file_entity_id=file.id) + await download_list_add_async(files=[item_no_version], synapse_client=syn) + scheduled_for_cart_removal.append(item_no_version) + + # WHEN I download the cart contents + with tempfile.TemporaryDirectory() as tmpdir: + manifest_path = await download_list_files_async( + download_location=tmpdir, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the file is downloaded successfully (no error in the manifest) + with open(manifest_path, newline="") as f: + rows = [r for r in csv.DictReader(f) if r["ID"] == file.id] + assert len(rows) == 1, f"Expected 1 row for {file.id}, got {len(rows)}" + assert ( + rows[0]["error"] == "" + ), f"Unexpected error for {file.id}: {rows[0]['error']}" + assert os.path.exists( + rows[0]["path"] + ), f"File not downloaded: {rows[0]['path']}" + + # AND the file is removed from the cart after a successful download, + # even though it was added without a version number + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert ( + file.id not in cart_ids + ), f"Expected {file.id} to be removed from cart after download." + + +class TestDownloadListManifestAsync: + """Integration tests for download_list_manifest_async.""" + + async def test_download_list_manifest_with_custom_csv_descriptor( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_manifest_async() respects a custom CsvTableDescriptor.""" + # GIVEN a cart containing a file whose name contains the quote + # character, so the writer must emit the escape character + path = utils.make_bogus_uuid_file() + schedule_for_cleanup(path) + uuid_suffix = str(uuid.uuid4()) + file_name = f"it's_{uuid_suffix}" + file = File( + parent_id=project["id"], + path=path, + name=file_name, + ) + await file.store_async(synapse_client=syn) + schedule_for_cleanup(file.id) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I request a manifest with all non-default descriptor options + descriptor = CsvTableDescriptor( + separator="\t", + quote_character="'", + escape_character="/", + line_end="\n", + is_first_line_header=False, + ) + manifest_path = await download_list_manifest_async( + csv_table_descriptor=descriptor, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + with open(manifest_path, newline="") as f: + content = f.read() + + # THEN tab separator is used + assert "\t" in content, "Expected tab separators in manifest" + + # AND the escape character was used for the embedded quote in the file name + assert "/'" in content, ( + f"Expected escape sequence /' in manifest (from escaping ' in file name), " + f"got: {content!r}" + ) + + # AND line endings are LF only (no CR) + assert "\r" not in content, "Expected LF-only line endings; found CR" + + # AND there is no header row -- the first non-empty line is the data row + lines = [line for line in content.split("\n") if line] + assert lines, "Expected at least one row in the manifest" + assert file.id in lines[0], ( + f"Expected first line to be the data row containing {file.id} " + f"(no header), got: {lines[0]!r}" + ) + + # AND the name field is wrapped in single quotes (the writer quoted it + # because it contains the quote character) + fields = lines[0].split("\t") + name_field = next((f for f in fields if uuid_suffix in f), None) + assert ( + name_field is not None + ), f"Name field containing {uuid_suffix!r} not found in {lines[0]!r}" + assert name_field.startswith("'") and name_field.endswith( + "'" + ), f"Expected name field wrapped in single quotes, got: {name_field!r}" diff --git a/tests/unit/synapseclient/operations/unit_test_download_list_operations.py b/tests/unit/synapseclient/operations/unit_test_download_list_operations.py new file mode 100644 index 000000000..38a45a56d --- /dev/null +++ b/tests/unit/synapseclient/operations/unit_test_download_list_operations.py @@ -0,0 +1,464 @@ +"""Unit tests for download_list operation functions.""" + +import csv +import json +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from synapseclient import Synapse +from synapseclient.core.exceptions import SynapseError, SynapseHTTPError +from synapseclient.models.table_components import CsvTableDescriptor +from synapseclient.operations import DownloadListItem +from synapseclient.operations.download_list_operations import ( + _download_manifest_file, + _read_manifest_rows, + _validate_and_extend_columns, + download_list_add_async, + download_list_clear_async, + download_list_files_async, + download_list_manifest_async, + download_list_remove_async, +) + + +class TestReadManifestRows: + """Tests for _read_manifest_rows.""" + + def _write_csv(self, tmp_path: Path, header: list[str], rows: list[dict]) -> str: + path = str(tmp_path / "manifest.csv") + with open(path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=header) + writer.writeheader() + writer.writerows(rows) + return path + + @pytest.mark.parametrize( + "csv_content, expected_columns, expected_row_count, row_checks", + [ + pytest.param( + "ID,versionNumber,name\nsyn111,1,file_a.txt\nsyn222,3,file_b.txt\n", + ["ID", "versionNumber", "name"], + 2, + {0: {"ID": "syn111", "versionNumber": "1"}, 1: {"ID": "syn222"}}, + id="standard_manifest", + ), + pytest.param( + "ID,versionNumber\n", + ["ID", "versionNumber"], + 0, + {}, + id="headers_only_no_rows", + ), + pytest.param( + "", + None, + 0, + {}, + id="empty_file", + ), + pytest.param( + "ID\nsyn999\n", + ["ID"], + 1, + {0: {"ID": "syn999"}}, + id="single_column", + ), + pytest.param( + "ID,versionNumber\nsyn123,\n", + ["ID", "versionNumber"], + 1, + {0: {"ID": "syn123", "versionNumber": ""}}, + id="empty_string_values_preserved", + ), + pytest.param( + 'ID,name\nsyn123,"file, with comma.txt"\n', + ["ID", "name"], + 1, + {0: {"name": "file, with comma.txt"}}, + id="quoted_field_with_comma", + ), + ], + ) + def test_read_manifest_rows( + self, + tmp_path: Path, + csv_content: str, + expected_columns: list[str], + expected_row_count: int, + row_checks: dict[int, dict[str, str]], + ) -> None: + """_read_manifest_rows returns correct columns and rows for various CSV shapes.""" + # GIVEN a CSV file with the specified content + path = str(tmp_path / "manifest.csv") + with open(path, "w", newline="") as f: + f.write(csv_content) + + # WHEN I read the manifest + columns, rows = _read_manifest_rows(path) + + # THEN columns and row count match expectations + assert columns == expected_columns + assert len(rows) == expected_row_count + + # AND specific cell values match + for row_idx, expected_values in row_checks.items(): + for key, value in expected_values.items(): + assert rows[row_idx][key] == value + + def test_many_rows(self, tmp_path: Path) -> None: + """Reading a manifest with many rows returns all of them.""" + # GIVEN a CSV with 500 rows + header = ["ID", "versionNumber"] + data = [{"ID": f"syn{i}", "versionNumber": str(i)} for i in range(500)] + path = self._write_csv(tmp_path, header, data) + + # WHEN I read the manifest + columns, rows = _read_manifest_rows(path) + + # THEN all 500 rows are returned + assert columns == header + assert len(rows) == 500 + assert rows[0]["ID"] == "syn0" + assert rows[499]["ID"] == "syn499" + + +class TestValidateAndExtendColumns: + """Tests for _validate_and_extend_columns.""" + + @pytest.mark.parametrize( + "columns, expected", + [ + pytest.param( + ["ID", "versionNumber"], + ["ID", "versionNumber", "path", "error"], + id="standard_columns", + ), + pytest.param( + ["ID"], + ["ID", "path", "error"], + id="single_column", + ), + pytest.param( + ["ID", "versionNumber", "name", "createdBy"], + ["ID", "versionNumber", "name", "createdBy", "path", "error"], + id="many_columns", + ), + ], + ) + def test_appends_path_and_error( + self, columns: list[str], expected: list[str] + ) -> None: + """Valid columns are returned with path and error appended.""" + assert _validate_and_extend_columns(columns) == expected + + def test_none_columns_raises(self) -> None: + """None columns (empty manifest) raises SynapseError.""" + with pytest.raises(SynapseError, match="no headers"): + _validate_and_extend_columns(None) + + @pytest.mark.parametrize( + "columns", + [ + pytest.param(["ID", "path"], id="contains_path"), + pytest.param(["ID", "error"], id="contains_error"), + pytest.param(["path", "error"], id="contains_both"), + ], + ) + def test_reserved_column_names_raise(self, columns: list[str]) -> None: + """Columns containing reserved names 'path' or 'error' raise SynapseError.""" + with pytest.raises(SynapseError, match="reserved column names"): + _validate_and_extend_columns(columns) + + +class TestDownloadListClearAsync: + """Tests for download_list_clear_async.""" + + async def test_download_list_clear_async(self, syn: Synapse) -> None: + """download_list_clear_async issues a DELETE to /download/list via the client.""" + # GIVEN a mocked rest_delete_async on the client + with patch.object( + syn, + "rest_delete_async", + new_callable=AsyncMock, + return_value=None, + ) as mocked_delete: + # WHEN I call download_list_clear_async with an explicit client + result = await download_list_clear_async(synapse_client=syn) + + # THEN the client issues a DELETE to /download/list + mocked_delete.assert_awaited_once_with("/download/list") + # AND the method returns None + assert result is None + + +class TestDownloadListAddAsync: + """Tests for download_list_add_async.""" + + async def test_download_list_add_async(self, syn: Synapse) -> None: + """download_list_add_async POSTs the batch to /download/list/add and returns the count.""" + # GIVEN a list of files to add and a mocked rest_post_async on the client + files = [ + DownloadListItem(file_entity_id="syn111", version_number=1), + DownloadListItem(file_entity_id="syn222", version_number=None), + ] + with patch.object( + syn, + "rest_post_async", + new_callable=AsyncMock, + return_value={"numberOfFilesAdded": 2}, + ) as mocked_post: + # WHEN I call download_list_add_async with an explicit client + result = await download_list_add_async(files=files, synapse_client=syn) + + # THEN the client POSTs the batch to /download/list/add + mocked_post.assert_awaited_once() + call = mocked_post.await_args + assert call.args == ("/download/list/add",) + assert json.loads(call.kwargs["body"]) == { + "batchToAdd": [ + {"fileEntityId": "syn111", "versionNumber": 1}, + {"fileEntityId": "syn222", "versionNumber": None}, + ] + } + # AND the method returns the number of files added + assert result == 2 + + +class TestDownloadListRemoveAsync: + """Tests for download_list_remove_async.""" + + async def test_download_list_remove_async(self, syn: Synapse) -> None: + """download_list_remove_async POSTs the batch to /download/list/remove and returns the count.""" + # GIVEN a list of files to remove and a mocked rest_post_async on the client + files = [ + DownloadListItem(file_entity_id="syn111", version_number=1), + DownloadListItem(file_entity_id="syn222", version_number=None), + ] + with patch.object( + syn, + "rest_post_async", + new_callable=AsyncMock, + return_value={"numberOfFilesRemoved": 2}, + ) as mocked_post: + # WHEN I call download_list_remove_async with an explicit client + result = await download_list_remove_async(files=files, synapse_client=syn) + + # THEN the client POSTs the batch to /download/list/remove + mocked_post.assert_awaited_once() + call = mocked_post.await_args + assert call.args == ("/download/list/remove",) + assert json.loads(call.kwargs["body"]) == { + "batchToRemove": [ + {"fileEntityId": "syn111", "versionNumber": 1}, + {"fileEntityId": "syn222", "versionNumber": None}, + ] + } + # AND the method returns the number of files removed + assert result == 2 + + +class TestDownloadListManifestAsync: + """Tests for download_list_manifest_async.""" + + async def test_download_list_manifest_async(self, syn: Synapse) -> None: + """download_list_manifest_async submits the request and returns the downloaded manifest path.""" + # GIVEN a mocked DownloadListManifestRequest whose job populates manifest_path + manifest_path = "/tmp/manifest.csv" + mock_instance = MagicMock() + mock_instance.send_job_and_wait_async = AsyncMock(return_value=None) + mock_instance.manifest_path = manifest_path + descriptor = CsvTableDescriptor() + with patch( + "synapseclient.operations.download_list_operations._DownloadListManifestRequest", + return_value=mock_instance, + ) as mocked_request_cls: + # WHEN I call download_list_manifest_async with an explicit descriptor and destination + result = await download_list_manifest_async( + csv_table_descriptor=descriptor, + destination="/tmp/out", + synapse_client=syn, + ) + + # THEN the request is built with the provided descriptor + mocked_request_cls.assert_called_once_with(csv_table_descriptor=descriptor) + # AND the job is awaited once with the destination and client + mock_instance.send_job_and_wait_async.assert_awaited_once_with( + post_exchange_args={"destination": "/tmp/out"}, + synapse_client=syn, + ) + # AND the method returns the manifest path set by the job + assert result == manifest_path + + async def test_download_list_manifest_async_no_file_produced( + self, syn: Synapse + ) -> None: + """download_list_manifest_async raises SynapseError when the job finishes without a file.""" + # GIVEN a mocked DownloadListManifestRequest whose job leaves manifest_path None + mock_instance = MagicMock() + mock_instance.send_job_and_wait_async = AsyncMock(return_value=None) + mock_instance.manifest_path = None + with patch( + "synapseclient.operations.download_list_operations._DownloadListManifestRequest", + return_value=mock_instance, + ): + # WHEN I call download_list_manifest_async + # THEN a SynapseError is raised + with pytest.raises(SynapseError, match="no local file was produced"): + await download_list_manifest_async(synapse_client=syn) + + +class TestDownloadListFilesAsync: + """Tests for download_list_files_async.""" + + async def test_empty_cart_propagates_synapse_http_error(self, syn: Synapse) -> None: + """download_list_files_async propagates the server's 'No files available for + download' error when the cart is empty. + + Synapse returns this error from the manifest async job rather than + returning an empty manifest, and the method must not swallow it. + """ + # GIVEN download_list_manifest_async raises SynapseHTTPError (simulating an empty cart) + with patch( + "synapseclient.operations.download_list_operations.download_list_manifest_async", + new_callable=AsyncMock, + side_effect=SynapseHTTPError("No files available for download"), + ): + # WHEN I call download_list_files_async + # THEN the error propagates to the caller unchanged + with pytest.raises( + SynapseHTTPError, match="No files available for download" + ): + await download_list_files_async(synapse_client=syn) + + +class TestDownloadManifestFile: + """Tests for _download_manifest_file.""" + + async def test_success_annotates_row_and_returns_item(self, syn: Synapse) -> None: + """On success, the row is annotated with path/error and a DownloadListItem + is returned with the resolved entity id and version.""" + # GIVEN a manifest row with a version and a mocked File whose + # get_async returns a file with a local path + row = {"ID": "syn111", "versionNumber": "2"} + mock_file = MagicMock() + mock_file.path = "/tmp/downloads/file_a.txt" + mock_file_cls = MagicMock( + return_value=MagicMock(get_async=AsyncMock(return_value=mock_file)) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file( + row, + download_location="/tmp/downloads", + synapse_client=syn, + ) + + # THEN the File is constructed with the coerced int version and + # download_location as path + mock_file_cls.assert_called_once_with( + id="syn111", + version_number=2, + path="/tmp/downloads", + ) + # AND the row is annotated with the local path and empty error + assert row["path"] == "/tmp/downloads/file_a.txt" + assert row["error"] == "" + # AND the returned DownloadListItem carries the entity id and version + assert result == DownloadListItem(file_entity_id="syn111", version_number=2) + + @pytest.mark.parametrize( + "row", + [ + pytest.param({"ID": "syn111"}, id="no_version_key"), + pytest.param({"ID": "syn111", "versionNumber": ""}, id="blank_version"), + pytest.param({"ID": "syn111", "versionNumber": None}, id="none_version"), + ], + ) + async def test_missing_version_fetches_latest( + self, syn: Synapse, row: dict + ) -> None: + """A missing or blank versionNumber is passed through as None so + File.get_async fetches the latest version.""" + # GIVEN a manifest row without a usable version and a mocked File + mock_file = MagicMock() + mock_file.path = "/tmp/downloads/latest.txt" + mock_file_cls = MagicMock( + return_value=MagicMock(get_async=AsyncMock(return_value=mock_file)) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file( + row, + download_location="/tmp/downloads", + synapse_client=syn, + ) + + # THEN File is constructed with version_number=None (meaning latest) + mock_file_cls.assert_called_once_with( + id="syn111", + version_number=None, + path="/tmp/downloads", + ) + # AND the row is annotated for success + assert row["path"] == "/tmp/downloads/latest.txt" + assert row["error"] == "" + # AND the returned DownloadListItem also carries version_number=None + assert result == DownloadListItem(file_entity_id="syn111", version_number=None) + + async def test_get_async_failure_annotates_row_and_returns_none( + self, syn: Synapse + ) -> None: + """When File.get_async raises, the exception is swallowed, the row is + annotated with the error message, and None is returned so the batch + continues.""" + # GIVEN a manifest row and a File whose get_async raises + row = {"ID": "syn999", "versionNumber": "1"} + error_message = "boom" + mock_file_cls = MagicMock( + return_value=MagicMock( + get_async=AsyncMock(side_effect=RuntimeError(error_message)) + ) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file(row, synapse_client=syn) + + # THEN the row is annotated with the error message and empty path + assert row["path"] == "" + assert row["error"] == error_message + # AND None is returned (so the caller skips this row) + assert result is None + + async def test_file_with_no_path_sets_row_path_empty(self, syn: Synapse) -> None: + """If get_async returns a file whose path is None, the row's path is + normalized to an empty string rather than the literal None.""" + # GIVEN a mocked File whose returned instance has path=None + row = {"ID": "syn111", "versionNumber": "1"} + mock_file = MagicMock() + mock_file.path = None + mock_file_cls = MagicMock( + return_value=MagicMock(get_async=AsyncMock(return_value=mock_file)) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file(row, synapse_client=syn) + + # THEN the row's path is an empty string (not None) + assert row["path"] == "" + assert row["error"] == "" + # AND a DownloadListItem is still returned for the successful call + assert result == DownloadListItem(file_entity_id="syn111", version_number=1)