1212import shutil
1313import tempfile
1414import unittest
15+ from typing import Any
1516from unittest .mock import Mock , mock_open , patch
1617
1718import pandas as pd
2627)
2728from synapseclient .extensions .curator .file_based_metadata_task import (
2829 _create_columns_from_json_schema ,
30+ _create_synapse_column_from_js_property ,
2931 _get_column_type_from_js_one_of_list ,
3032 _get_column_type_from_js_property ,
3133 _get_list_column_type_from_js_property ,
5052 SchemaRegistryColumnConfig ,
5153 get_latest_schema_uri ,
5254)
53- from synapseclient .models import ColumnType
55+ from synapseclient .models import Column , ColumnType
5456from synapseclient .models .curation import (
5557 FileBasedMetadataTaskProperties ,
5658 RecordBasedMetadataTaskProperties ,
@@ -1670,50 +1672,6 @@ def test_create_columns_from_json_schema_success(self):
16701672 assert all (hasattr (col , "name" ) for col in columns )
16711673 assert all (hasattr (col , "column_type" ) for col in columns )
16721674
1673- def test_get_column_type_from_js_property_enum (self ):
1674- """Test getting column type for enum property."""
1675- # GIVEN a JSON schema property with an enum
1676- js_property = {"enum" : ["option1" , "option2" , "option3" ]}
1677-
1678- # WHEN I get the column type
1679- result = _get_column_type_from_js_property (js_property )
1680-
1681- # THEN it should return STRING type
1682- assert result == ColumnType .STRING
1683-
1684- def test_get_column_type_from_js_property_array (self ):
1685- """Test getting column type for array property."""
1686- # GIVEN a JSON schema property with array type
1687- js_property = {"type" : "array" , "items" : {"type" : "string" }}
1688-
1689- # WHEN I get the column type
1690- result = _get_column_type_from_js_property (js_property )
1691-
1692- # THEN it should return a list type
1693- assert result == ColumnType .STRING_LIST
1694-
1695- def test_get_column_type_from_js_property_one_of (self ):
1696- """Test getting column type for oneOf property."""
1697- # GIVEN a JSON schema property with oneOf
1698- js_property = {"oneOf" : [{"type" : "string" }, {"type" : "null" }]}
1699-
1700- # WHEN I get the column type
1701- result = _get_column_type_from_js_property (js_property )
1702-
1703- # THEN it should return STRING type
1704- assert result == ColumnType .STRING
1705-
1706- def test_get_column_type_from_js_property_fallback (self ):
1707- """Test getting column type fallback to STRING."""
1708- # GIVEN a JSON schema property without recognizable type
1709- js_property = {"description" : "some property" }
1710-
1711- # WHEN I get the column type
1712- result = _get_column_type_from_js_property (js_property )
1713-
1714- # THEN it should return STRING type as fallback
1715- assert result == ColumnType .STRING
1716-
17171675 def test_get_column_type_from_js_one_of_list_with_enum (self ):
17181676 """Test getting column type from oneOf list containing enum."""
17191677 # GIVEN a oneOf list with an enum
@@ -1722,8 +1680,8 @@ def test_get_column_type_from_js_one_of_list_with_enum(self):
17221680 # WHEN I get the column type
17231681 result = _get_column_type_from_js_one_of_list (js_one_of_list )
17241682
1725- # THEN it should return STRING type
1726- assert result == ColumnType .STRING
1683+ # THEN it should return MEDIUMTEXT type
1684+ assert result == ColumnType .MEDIUMTEXT
17271685
17281686 def test_get_column_type_from_js_one_of_list_single_type (self ):
17291687 """Test getting column type from oneOf list with single non-null type."""
@@ -1758,8 +1716,8 @@ def test_get_column_type_from_js_one_of_list_fallback(self):
17581716 # WHEN I get the column type
17591717 result = _get_column_type_from_js_one_of_list (js_one_of_list )
17601718
1761- # THEN it should return STRING type as fallback
1762- assert result == ColumnType .STRING
1719+ # THEN it should return MEDIUMTEXT type as fallback
1720+ assert result == ColumnType .MEDIUMTEXT
17631721
17641722 def test_get_list_column_type_from_js_property_with_enum (self ):
17651723 """Test getting list column type for property with enum items."""
@@ -1795,6 +1753,120 @@ def test_get_list_column_type_from_js_property_fallback(self):
17951753 assert result == ColumnType .STRING_LIST
17961754
17971755
1756+ @pytest .mark .parametrize (
1757+ "schema, expected" ,
1758+ [
1759+ (
1760+ {
1761+ "properties" : {
1762+ "string_col" : {"type" : "string" },
1763+ }
1764+ },
1765+ [
1766+ Column (name = "string_col" , column_type = ColumnType .MEDIUMTEXT ),
1767+ ],
1768+ ),
1769+ (
1770+ {
1771+ "properties" : {
1772+ "string_col" : {"type" : "string" },
1773+ "int_col" : {"type" : "integer" },
1774+ "bool_col" : {"type" : "boolean" },
1775+ }
1776+ },
1777+ [
1778+ Column (name = "string_col" , column_type = ColumnType .MEDIUMTEXT ),
1779+ Column (name = "int_col" , column_type = ColumnType .INTEGER ),
1780+ Column (name = "bool_col" , column_type = ColumnType .BOOLEAN ),
1781+ ],
1782+ ),
1783+ ],
1784+ ids = ["one column" , "three columns" ],
1785+ )
1786+ def test_create_columns_from_json_schema (
1787+ schema : dict [str , Any ], expected : list [Column ]
1788+ ):
1789+ assert _create_columns_from_json_schema (schema ) == expected
1790+
1791+
1792+ @pytest .mark .parametrize (
1793+ "prop, name, expected_type" ,
1794+ [
1795+ (
1796+ {"type" : "array" , "items" : {"type" : "string" }},
1797+ "string_list_col" ,
1798+ ColumnType .STRING_LIST ,
1799+ ),
1800+ (
1801+ {"type" : "array" , "items" : {"type" : "integer" }},
1802+ "int_list_col" ,
1803+ ColumnType .INTEGER_LIST ,
1804+ ),
1805+ (
1806+ {"type" : "array" , "items" : {"type" : "boolean" }},
1807+ "bool_list_col" ,
1808+ ColumnType .BOOLEAN_LIST ,
1809+ ),
1810+ (
1811+ {"type" : "string" },
1812+ "string_col" ,
1813+ ColumnType .MEDIUMTEXT ,
1814+ ),
1815+ ],
1816+ ids = ["string_list" , "integer_list" , "boolean_list" , "string" ],
1817+ )
1818+ def test_create_synapse_column_from_js_property (
1819+ prop : dict [str , Any ], name : str , expected_type : ColumnType
1820+ ):
1821+ result = _create_synapse_column_from_js_property (prop , name )
1822+ assert isinstance (result , Column )
1823+ assert result .name == name
1824+ assert result .column_type == expected_type
1825+
1826+
1827+ @pytest .mark .parametrize (
1828+ "schema" ,
1829+ [{}, {"properties" : []}],
1830+ ids = ["empty schema" , "properties is not a d ict" ],
1831+ )
1832+ def test_create_columns_from_json_schema_exceptions (schema : dict [str , Any ]):
1833+ with pytest .raises (ValueError ):
1834+ _create_columns_from_json_schema (schema )
1835+
1836+
1837+ @pytest .mark .parametrize (
1838+ "prop, expected" ,
1839+ [
1840+ ({"enum" : ["a" , "b" , "c" ]}, ColumnType .MEDIUMTEXT ),
1841+ ({"type" : "string" }, ColumnType .MEDIUMTEXT ),
1842+ ({"type" : "integer" }, ColumnType .INTEGER ),
1843+ ({"type" : "number" }, ColumnType .DOUBLE ),
1844+ ({"type" : "boolean" }, ColumnType .BOOLEAN ),
1845+ ({"type" : ["integer" , "null" ]}, ColumnType .INTEGER ),
1846+ ({"type" : ["integer" , "string" ]}, ColumnType .MEDIUMTEXT ),
1847+ ({"type" : "array" , "items" : {"type" : "integer" }}, ColumnType .INTEGER_LIST ),
1848+ ({"oneOf" : [{"type" : "integer" }, {"type" : "null" }]}, ColumnType .INTEGER ),
1849+ ({"type" : "unknown" }, ColumnType .MEDIUMTEXT ),
1850+ ({}, ColumnType .MEDIUMTEXT ),
1851+ ],
1852+ ids = [
1853+ "enum_property" ,
1854+ "type_string" ,
1855+ "type_integer" ,
1856+ "type_number" ,
1857+ "type_boolean" ,
1858+ "type_list_nullable" ,
1859+ "type_list_multiple_types" ,
1860+ "type_array" ,
1861+ "one_of_list" ,
1862+ "unknown_type" ,
1863+ "empty_property" ,
1864+ ],
1865+ )
1866+ def test_get_column_type_from_js_property (prop : dict [str , Any ], expected : ColumnType ):
1867+ assert _get_column_type_from_js_property (prop ) == expected
1868+
1869+
17981870class TestGetLatestSchemaUri (unittest .TestCase ):
17991871 """Test cases for get_latest_schema_uri function."""
18001872
0 commit comments