Skip to content
110 changes: 110 additions & 0 deletions docs/guides/extensions/curator/metadata_curation.md
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,116 @@ else:
print("No validation results available. The Grid session must be exported to generate validation results.")
```

### Example: Getting data into a Grid for a file-based workflow

The following example is for file-based curation.
It assumes your data is in a CSV file where each column is a property.

```python
import pandas as pd
from synapseclient import Synapse
from synapseclient.models import CurationTask, Folder, File
from synapseclient.core.utils import make_bogus_data_file
from synapseclient.extensions.curator import create_file_based_metadata_task

# 1. Replace all these values with your own information
PROJECT_ID = "syn68175188"
FOLDER_NAME = "Patient Curation Folder"
CSV_PATH = "patient.csv"
Comment thread
andrewelamb marked this conversation as resolved.
Outdated
JSON_SCHEMA_URI = "dpetest-test.schematic.Patient"
CURATION_TASK_NAME = "File-based curation task for patients"
INSTRUCTIONS = "Please curate the patient information."

# 2. Login to Synapse
syn = Synapse()
syn.login()

# 3. Get annotations from CSV file
annotations = pd.read_csv(CSV_PATH).to_dict(orient="records")
Copy link
Copy Markdown
Member

@thomasyu888 thomasyu888 Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This currently bypasses the grid altogether by pushing data in as annotations. Can we add a TODO comment here?

Nit: Could we leverage the entity view API to push the CSV directly into the entity view?


# 4. Create a folder to store the file that will be used for curation
folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID)
folder = folder.store(synapse_client=syn)

# 5. Create a files, annotate them, and store them in the Synapse folder
Comment thread
andrewelamb marked this conversation as resolved.
Outdated
path_to_file1 = make_bogus_data_file(n=5)
file = File(path=path_to_file1, parent_id=folder.id, annotations=annotations[0])
file = file.store(synapse_client=syn)
path_to_file2 = make_bogus_data_file(n=5)
file = File(path=path_to_file2, parent_id=folder.id, annotations=annotations[1])
file = file.store(synapse_client=syn)

# 6. Create EntityView and CurationTask
view_id, task_id = create_file_based_metadata_task(
Comment thread
andrewelamb marked this conversation as resolved.
folder_id=folder.id,
curation_task_name=CURATION_TASK_NAME,
instructions=INSTRUCTIONS,
schema_uri=JSON_SCHEMA_URI,
synapse_client=syn,
)

Comment thread
linglp marked this conversation as resolved.
# 7. Cleanup all Synapse entities created
folder.delete(synapse_client=syn)
CurationTask(task_id=task_id).delete(synapse_client=syn, delete_source=True)
```

### Example: Getting data into a Grid for a record-based workflow

The following example is for record-based curation.
It assumes your data is in a CSV file where each column is a property.

```python
import pandas as pd

from synapseclient import Synapse
from synapseclient.models import Folder
from synapseclient.extensions.curator import create_record_based_metadata_task


# 1. Replace all these values with your own information
PROJECT_ID = "syn68175188"
FOLDER_NAME = "Patient Curation Folder"
CSV_PATH = "patient.csv"
JSON_SCHEMA_URI = "dpetest-test.schematic.Patient"
CURATION_TASK_NAME = "Record-based curation task for patients"
INSTRUCTIONS = "Please curate the patient information."
RECORD_SET_NAME = "Patient Record Set"
RECORD_SET_DESCRIPTION = "A record set for patients created for a record-based curation task example."
UPSERT_KEYS = ["PatientID"]

# 2. Login to Synapse
syn = Synapse()
syn.login()

# 3. Create a folder to store the RecordSet in
folder = Folder(name=FOLDER_NAME, parent_id=PROJECT_ID)
Comment thread
andrewelamb marked this conversation as resolved.
folder = folder.store(synapse_client=syn)

# 4. Create RecordSet, CurationTask, and Grid
record_set, task, grid = create_record_based_metadata_task(
folder_id=folder.id,
record_set_name=RECORD_SET_NAME,
record_set_description=RECORD_SET_DESCRIPTION,
curation_task_name=CURATION_TASK_NAME,
upsert_keys=UPSERT_KEYS,
instructions=INSTRUCTIONS,
schema_uri=JSON_SCHEMA_URI,
synapse_client=syn,
)

# 5. Store the record set with the path to the CSV file as an annotation.
# TODO: https://sagebionetworks.jira.com/browse/SYNPY-1781
# Once SYNPY-1781 is finished add code here for uploading data from a CSV file into a grid session.
record_set.get(synapse_client=syn)
record_set.path = CSV_PATH
record_set = record_set.store(synapse_client=syn)

# 6. Cleanup all Synapse entities created
folder.delete(synapse_client=syn)
record_set.delete(synapse_client=syn, delete_source=True)
Comment thread
andrewelamb marked this conversation as resolved.
Outdated
grid.delete(synapse_client=syn)
Comment thread
andrewelamb marked this conversation as resolved.
Outdated
```

### Example: Complete validation workflow for animal study metadata

This example demonstrates the full workflow from creating a curation task through validating the submitted metadata:
Expand Down
Loading