Skip to content

Commit dcda6d0

Browse files
authored
Merge pull request #1329 from Sage-Bionetworks/SYNPY-1765
[SYNPY-1765] Remove project id arg from create_record_based_curator_task
2 parents 486a9fc + 5b72682 commit dcda6d0

6 files changed

Lines changed: 166 additions & 107 deletions

File tree

synapseclient/extensions/curator/file_based_metadata_task.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from synapseclient import Synapse # type: ignore
1111
from synapseclient import Wiki # type: ignore
1212
from synapseclient.core.exceptions import SynapseHTTPError # type: ignore
13+
from synapseclient.extensions.curator.utils import project_id_from_entity_id
1314
from synapseclient.models import ( # type: ignore
1415
Column,
1516
ColumnType,
@@ -430,27 +431,15 @@ def create_file_based_metadata_task(
430431
synapse_client.logger.info(
431432
"Attempting to get the Synapse ID of the provided folders project."
432433
)
433-
try:
434-
entity = Folder(folder_id).get(synapse_client=synapse_client)
435-
parent = synapse_client.get(entity.parent_id)
436-
project = None
437-
while not project:
438-
if parent.concreteType == "org.sagebionetworks.repo.model.Project":
439-
project = parent
440-
break
441-
parent = synapse_client.get(parent.parentId)
442-
except Exception as e:
443-
synapse_client.logger.exception(
444-
"Error getting the Synapse ID of the provided folders project"
445-
)
446-
raise e
434+
435+
project_id = project_id_from_entity_id(folder_id, synapse_client=synapse_client)
447436
synapse_client.logger.info("Got the Synapse ID of the provided folders project.")
448437

449438
synapse_client.logger.info("Attempting to create the CurationTask.")
450439
try:
451440
task = CurationTask(
452441
data_type=task_datatype,
453-
project_id=project.id,
442+
project_id=project_id,
454443
instructions=instructions,
455444
assignee_principal_id=(
456445
str(assignee_principal_id)

synapseclient/extensions/curator/record_based_metadata_task.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from synapseclient import Synapse
1212
from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE
1313
from synapseclient.core.utils import test_import_pandas
14+
from synapseclient.extensions.curator.utils import project_id_from_entity_id
1415
from synapseclient.models import (
1516
CurationTask,
1617
Grid,
@@ -99,7 +100,6 @@ def extract_schema_properties_from_web(
99100

100101

101102
def create_record_based_metadata_task(
102-
project_id: str,
103103
folder_id: str,
104104
record_set_name: str,
105105
record_set_description: str,
@@ -112,6 +112,7 @@ def create_record_based_metadata_task(
112112
assignee_principal_id: Optional[Union[str, int]] = None,
113113
*,
114114
synapse_client: Optional[Synapse] = None,
115+
project_id: Optional[str] = None, # Deprecated, will be removed in v5.0.0
115116
) -> Tuple[RecordSet, CurationTask, Grid]:
116117
"""
117118
Generate and upload CSV templates as a RecordSet for record-based metadata,
@@ -142,7 +143,6 @@ def create_record_based_metadata_task(
142143
143144
record_set, task, grid = create_record_based_metadata_task(
144145
synapse_client=syn,
145-
project_id="syn12345678",
146146
folder_id="syn87654321",
147147
record_set_name="BiospecimenMetadata_RecordSet",
148148
record_set_description="RecordSet for biospecimen metadata curation",
@@ -155,9 +155,10 @@ def create_record_based_metadata_task(
155155
```
156156
157157
Arguments:
158-
project_id: The Synapse ID of the project where the folder exists.
159158
folder_id: The Synapse ID of the folder to upload RecordSet to.
160-
record_set_name: Name for the RecordSet.
159+
record_set_name: Name for the RecordSet entity that will be created.
160+
A RecordSet entity captures record-based metadata as a special type of CSV and stores contributor
161+
provided metadata collected via Curator enabling sharing and download of validated metadata in Synapse.
161162
record_set_description: Description for the RecordSet.
162163
curation_task_name: Name for the CurationTask (used as data_type field).
163164
Must be unique within the project, otherwise if it matches an existing
@@ -177,6 +178,7 @@ def create_record_based_metadata_task(
177178
synapse_client: If not passed in and caching was not disabled by
178179
`Synapse.allow_client_caching(False)` this will use the last created
179180
instance from the Synapse class constructor.
181+
project_id: Deprecated, will be removed in v5.0.0
180182
181183
Returns:
182184
Tuple containing the created RecordSet, CurationTask, and Grid objects
@@ -186,8 +188,6 @@ def create_record_based_metadata_task(
186188
SynapseError: If there are issues with Synapse operations.
187189
"""
188190
# Validate required parameters
189-
if not project_id:
190-
raise ValueError("project_id is required")
191191
if not folder_id:
192192
raise ValueError("folder_id is required")
193193
if not record_set_name:
@@ -203,8 +203,18 @@ def create_record_based_metadata_task(
203203
if not schema_uri:
204204
raise ValueError("schema_uri is required")
205205

206+
if project_id:
207+
synapse_client.logger.warning(
208+
"The 'project_id' parameter is deprecated and will be removed in v5.0.0. "
209+
"The project ID will be inferred from the folder ID provided."
210+
)
211+
206212
synapse_client = Synapse.get_client(synapse_client=synapse_client)
207213

214+
project_id = project_id_from_entity_id(
215+
entity_id=folder_id, synapse_client=synapse_client
216+
)
217+
208218
template_df = extract_schema_properties_from_web(
209219
syn=synapse_client, schema_uri=schema_uri
210220
)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from synapseclient import Synapse
2+
from synapseclient.models import Project
3+
from synapseclient.operations import get
4+
5+
"""This number represents a safeguard against infinite loops when traversing the folder hierarchy to find the project ID."""
6+
MAX_HIERARCHY_DEPTH = 1000
7+
8+
9+
def project_id_from_entity_id(entity_id: str, synapse_client: Synapse) -> str:
10+
"""
11+
Retrieves the project ID from a given entity ID by traversing up the folder hierarchy
12+
13+
Args:
14+
entity_id: The Synapse ID of the entity (e.g., folder, file) to start from.
15+
synapse_client: Authenticated Synapse client instance
16+
17+
Returns:
18+
The Synapse ID of the project that the entity belongs to.
19+
20+
Raises:
21+
ValueError: If the project ID cannot be found within 1000 iterations.
22+
"""
23+
24+
# Get the project ID from the folder ID
25+
current_obj = get(entity_id, synapse_client=synapse_client)
26+
iterations = 0
27+
while not isinstance(current_obj, Project):
28+
current_obj = get(current_obj.parent_id, synapse_client=synapse_client)
29+
iterations += 1
30+
if iterations > MAX_HIERARCHY_DEPTH:
31+
raise ValueError("Could not find project ID in folder hierarchy")
32+
return current_obj.id

synapseclient/models/recordset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,12 @@ def get_detailed_validation_results(
360360
@dataclass()
361361
@async_to_sync
362362
class RecordSet(RecordSetSynchronousProtocol, AccessControllable, BaseJSONSchema):
363-
"""A RecordSet within Synapse.
363+
"""
364+
A RecordSet entity captures record-based metadata as a special type of CSV.
365+
The record set content can be curated using the grid services.
366+
When a grid is created from a record set, its data can be exported back to a new version of the record set.
367+
The export will include the validation summary as well as a validation file handle that
368+
contains detailed validation results for each row in the record set.
364369
365370
Attributes:
366371
id: The unique immutable ID for this file. A new ID will be generated for new

tests/integration/synapseclient/test_schema_management.py renamed to tests/integration/synapseclient/extensions/curator/test_schema_management.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
from synapseclient import Synapse
1010
from synapseclient.extensions.curator import bind_jsonschema, register_jsonschema
11-
from synapseclient.models import File, Folder, Project, SchemaOrganization
11+
from synapseclient.extensions.curator.record_based_metadata_task import (
12+
project_id_from_entity_id,
13+
)
14+
from synapseclient.models import Folder, Project, SchemaOrganization
1215

1316

1417
def create_test_name():
@@ -259,3 +262,37 @@ def test_complete_workflow(
259262
# Cleanup: unbind schema before deleting folder
260263
folder.unbind_schema(synapse_client=syn)
261264
syn.delete(folder.id)
265+
266+
267+
class TestProjectIDFromEntityID:
268+
@pytest.fixture(scope="module")
269+
def temp_hierarchy(self, syn: Synapse, request) -> tuple[str, str, str]:
270+
"""Creates a Project -> Folder -> Folder hierarchy for testing."""
271+
project = Project(name=create_test_name()).store(synapse_client=syn)
272+
folder1 = Folder(name=create_test_name(), parent_id=project.id).store(
273+
synapse_client=syn
274+
)
275+
folder2 = Folder(name=create_test_name(), parent_id=folder1.id).store(
276+
synapse_client=syn
277+
)
278+
279+
def delete_project():
280+
project.delete(synapse_client=syn)
281+
282+
request.addfinalizer(delete_project)
283+
return project.id, folder1.id, folder2.id
284+
285+
def test_project_id_from_folder(self, syn, temp_hierarchy):
286+
"""Test finding project id when input id is from a nested folder."""
287+
folder_id = temp_hierarchy[2]
288+
expected_project_id = temp_hierarchy[0]
289+
290+
result = project_id_from_entity_id(folder_id, syn)
291+
assert result == expected_project_id
292+
293+
def test_project_id_from_project(self, syn, temp_hierarchy):
294+
"""Test finding project id when input id is for a project"""
295+
project_id = temp_hierarchy[0]
296+
297+
result = project_id_from_entity_id(project_id, syn)
298+
assert result == project_id

0 commit comments

Comments
 (0)