Skip to content

Commit a8519aa

Browse files
committed
change to use synapse defaults
1 parent 7cc1cd7 commit a8519aa

3 files changed

Lines changed: 122 additions & 182 deletions

File tree

synapseclient/extensions/curator/file_based_metadata_task.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,6 @@
3636
"boolean": ColumnType.BOOLEAN_LIST,
3737
}
3838

39-
MAX_LIST_STRING_ITEM_SIZE = 100
40-
MAX_LIST_LENGTH = 50
41-
4239

4340
def create_json_schema_entity_view(
4441
syn: Synapse,
@@ -223,19 +220,7 @@ def _create_synapse_column_from_js_property(
223220
A Synapse Column based on the JSON Schema property.
224221
"""
225222
column_type = _get_column_type_from_js_property(js_property)
226-
maximum_size = None
227-
maximum_list_length = None
228-
if column_type in LIST_TYPE_DICT.values():
229-
maximum_list_length = MAX_LIST_LENGTH
230-
if column_type == ColumnType.STRING_LIST:
231-
maximum_size = MAX_LIST_STRING_ITEM_SIZE
232-
233-
return Column(
234-
name=name,
235-
column_type=column_type,
236-
maximum_size=maximum_size,
237-
maximum_list_length=maximum_list_length,
238-
)
223+
return Column(name=name, column_type=column_type)
239224

240225

241226
def _get_column_type_from_js_property(js_property: dict[str, Any]) -> ColumnType:

tests/unit/synapseclient/extensions/test_file_based_metadata_task.py

Lines changed: 0 additions & 117 deletions
This file was deleted.

tests/unit/synapseclient/extensions/unit_test_curator.py

Lines changed: 121 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import shutil
1313
import tempfile
1414
import unittest
15+
from typing import Any
1516
from unittest.mock import Mock, mock_open, patch
1617

1718
import pandas as pd
@@ -26,6 +27,7 @@
2627
)
2728
from synapseclient.extensions.curator.file_based_metadata_task import (
2829
_create_columns_from_json_schema,
30+
_create_synapse_column_from_js_property,
2931
_get_column_type_from_js_one_of_list,
3032
_get_column_type_from_js_property,
3133
_get_list_column_type_from_js_property,
@@ -50,7 +52,7 @@
5052
SchemaRegistryColumnConfig,
5153
get_latest_schema_uri,
5254
)
53-
from synapseclient.models import ColumnType
55+
from synapseclient.models import Column, ColumnType
5456
from synapseclient.models.curation import (
5557
FileBasedMetadataTaskProperties,
5658
RecordBasedMetadataTaskProperties,
@@ -1670,50 +1672,6 @@ def test_create_columns_from_json_schema_success(self):
16701672
assert all(hasattr(col, "name") for col in columns)
16711673
assert all(hasattr(col, "column_type") for col in columns)
16721674

1673-
def test_get_column_type_from_js_property_enum(self):
1674-
"""Test getting column type for enum property."""
1675-
# GIVEN a JSON schema property with an enum
1676-
js_property = {"enum": ["option1", "option2", "option3"]}
1677-
1678-
# WHEN I get the column type
1679-
result = _get_column_type_from_js_property(js_property)
1680-
1681-
# THEN it should return STRING type
1682-
assert result == ColumnType.STRING
1683-
1684-
def test_get_column_type_from_js_property_array(self):
1685-
"""Test getting column type for array property."""
1686-
# GIVEN a JSON schema property with array type
1687-
js_property = {"type": "array", "items": {"type": "string"}}
1688-
1689-
# WHEN I get the column type
1690-
result = _get_column_type_from_js_property(js_property)
1691-
1692-
# THEN it should return a list type
1693-
assert result == ColumnType.STRING_LIST
1694-
1695-
def test_get_column_type_from_js_property_one_of(self):
1696-
"""Test getting column type for oneOf property."""
1697-
# GIVEN a JSON schema property with oneOf
1698-
js_property = {"oneOf": [{"type": "string"}, {"type": "null"}]}
1699-
1700-
# WHEN I get the column type
1701-
result = _get_column_type_from_js_property(js_property)
1702-
1703-
# THEN it should return STRING type
1704-
assert result == ColumnType.STRING
1705-
1706-
def test_get_column_type_from_js_property_fallback(self):
1707-
"""Test getting column type fallback to STRING."""
1708-
# GIVEN a JSON schema property without recognizable type
1709-
js_property = {"description": "some property"}
1710-
1711-
# WHEN I get the column type
1712-
result = _get_column_type_from_js_property(js_property)
1713-
1714-
# THEN it should return STRING type as fallback
1715-
assert result == ColumnType.STRING
1716-
17171675
def test_get_column_type_from_js_one_of_list_with_enum(self):
17181676
"""Test getting column type from oneOf list containing enum."""
17191677
# GIVEN a oneOf list with an enum
@@ -1722,8 +1680,8 @@ def test_get_column_type_from_js_one_of_list_with_enum(self):
17221680
# WHEN I get the column type
17231681
result = _get_column_type_from_js_one_of_list(js_one_of_list)
17241682

1725-
# THEN it should return STRING type
1726-
assert result == ColumnType.STRING
1683+
# THEN it should return MEDIUMTEXT type
1684+
assert result == ColumnType.MEDIUMTEXT
17271685

17281686
def test_get_column_type_from_js_one_of_list_single_type(self):
17291687
"""Test getting column type from oneOf list with single non-null type."""
@@ -1758,8 +1716,8 @@ def test_get_column_type_from_js_one_of_list_fallback(self):
17581716
# WHEN I get the column type
17591717
result = _get_column_type_from_js_one_of_list(js_one_of_list)
17601718

1761-
# THEN it should return STRING type as fallback
1762-
assert result == ColumnType.STRING
1719+
# THEN it should return MEDIUMTEXT type as fallback
1720+
assert result == ColumnType.MEDIUMTEXT
17631721

17641722
def test_get_list_column_type_from_js_property_with_enum(self):
17651723
"""Test getting list column type for property with enum items."""
@@ -1795,6 +1753,120 @@ def test_get_list_column_type_from_js_property_fallback(self):
17951753
assert result == ColumnType.STRING_LIST
17961754

17971755

1756+
@pytest.mark.parametrize(
1757+
"schema, expected",
1758+
[
1759+
(
1760+
{
1761+
"properties": {
1762+
"string_col": {"type": "string"},
1763+
}
1764+
},
1765+
[
1766+
Column(name="string_col", column_type=ColumnType.MEDIUMTEXT),
1767+
],
1768+
),
1769+
(
1770+
{
1771+
"properties": {
1772+
"string_col": {"type": "string"},
1773+
"int_col": {"type": "integer"},
1774+
"bool_col": {"type": "boolean"},
1775+
}
1776+
},
1777+
[
1778+
Column(name="string_col", column_type=ColumnType.MEDIUMTEXT),
1779+
Column(name="int_col", column_type=ColumnType.INTEGER),
1780+
Column(name="bool_col", column_type=ColumnType.BOOLEAN),
1781+
],
1782+
),
1783+
],
1784+
ids=["one column", "three columns"],
1785+
)
1786+
def test_create_columns_from_json_schema(
1787+
schema: dict[str, Any], expected: list[Column]
1788+
):
1789+
assert _create_columns_from_json_schema(schema) == expected
1790+
1791+
1792+
@pytest.mark.parametrize(
1793+
"prop, name, expected_type",
1794+
[
1795+
(
1796+
{"type": "array", "items": {"type": "string"}},
1797+
"string_list_col",
1798+
ColumnType.STRING_LIST,
1799+
),
1800+
(
1801+
{"type": "array", "items": {"type": "integer"}},
1802+
"int_list_col",
1803+
ColumnType.INTEGER_LIST,
1804+
),
1805+
(
1806+
{"type": "array", "items": {"type": "boolean"}},
1807+
"bool_list_col",
1808+
ColumnType.BOOLEAN_LIST,
1809+
),
1810+
(
1811+
{"type": "string"},
1812+
"string_col",
1813+
ColumnType.MEDIUMTEXT,
1814+
),
1815+
],
1816+
ids=["string_list", "integer_list", "boolean_list", "string"],
1817+
)
1818+
def test_create_synapse_column_from_js_property(
1819+
prop: dict[str, Any], name: str, expected_type: ColumnType
1820+
):
1821+
result = _create_synapse_column_from_js_property(prop, name)
1822+
assert isinstance(result, Column)
1823+
assert result.name == name
1824+
assert result.column_type == expected_type
1825+
1826+
1827+
@pytest.mark.parametrize(
1828+
"schema",
1829+
[{}, {"properties": []}],
1830+
ids=["empty schema", "properties is not a d ict"],
1831+
)
1832+
def test_create_columns_from_json_schema_exceptions(schema: dict[str, Any]):
1833+
with pytest.raises(ValueError):
1834+
_create_columns_from_json_schema(schema)
1835+
1836+
1837+
@pytest.mark.parametrize(
1838+
"prop, expected",
1839+
[
1840+
({"enum": ["a", "b", "c"]}, ColumnType.MEDIUMTEXT),
1841+
({"type": "string"}, ColumnType.MEDIUMTEXT),
1842+
({"type": "integer"}, ColumnType.INTEGER),
1843+
({"type": "number"}, ColumnType.DOUBLE),
1844+
({"type": "boolean"}, ColumnType.BOOLEAN),
1845+
({"type": ["integer", "null"]}, ColumnType.INTEGER),
1846+
({"type": ["integer", "string"]}, ColumnType.MEDIUMTEXT),
1847+
({"type": "array", "items": {"type": "integer"}}, ColumnType.INTEGER_LIST),
1848+
({"oneOf": [{"type": "integer"}, {"type": "null"}]}, ColumnType.INTEGER),
1849+
({"type": "unknown"}, ColumnType.MEDIUMTEXT),
1850+
({}, ColumnType.MEDIUMTEXT),
1851+
],
1852+
ids=[
1853+
"enum_property",
1854+
"type_string",
1855+
"type_integer",
1856+
"type_number",
1857+
"type_boolean",
1858+
"type_list_nullable",
1859+
"type_list_multiple_types",
1860+
"type_array",
1861+
"one_of_list",
1862+
"unknown_type",
1863+
"empty_property",
1864+
],
1865+
)
1866+
def test_get_column_type_from_js_property(prop: dict[str, Any], expected: ColumnType):
1867+
assert _get_column_type_from_js_property(prop) == expected
1868+
1869+
17981870
class TestGetLatestSchemaUri(unittest.TestCase):
17991871
"""Test cases for get_latest_schema_uri function."""
18001872

0 commit comments

Comments
 (0)