2424from synapseclient .operations import FileOptions , get
2525
2626TYPE_DICT = {
27- "string" : ColumnType .STRING ,
27+ "string" : ColumnType .MEDIUMTEXT ,
2828 "number" : ColumnType .DOUBLE ,
2929 "integer" : ColumnType .INTEGER ,
3030 "boolean" : ColumnType .BOOLEAN ,
3636 "boolean" : ColumnType .BOOLEAN_LIST ,
3737}
3838
39+ MAX_LIST_STRING_ITEM_SIZE = 100
40+ MAX_LIST_LENGTH = 50
41+
3942
4043def create_json_schema_entity_view (
4144 syn : Synapse ,
@@ -199,48 +202,78 @@ def _create_columns_from_json_schema(json_schema: dict[str, Any]) -> list[Column
199202 raise ValueError (
200203 "The 'properties' field in the JSON Schema must be a dictionary."
201204 )
202- columns = []
203- for name , prop_schema in properties .items ():
204- column_type = _get_column_type_from_js_property (prop_schema )
205- maximum_size = None
206- if column_type == "STRING" :
207- maximum_size = 100
208- if column_type in LIST_TYPE_DICT .values ():
209- maximum_size = 5
210-
211- column = Column (
212- name = name ,
213- column_type = column_type ,
214- maximum_size = maximum_size ,
215- default_value = None ,
216- )
217- columns .append (column )
205+ columns = [
206+ _create_synapse_column_from_js_property (prop_schema , name )
207+ for name , prop_schema in properties .items ()
208+ ]
218209 return columns
219210
220211
212+ def _create_synapse_column_from_js_property (
213+ js_property : dict [str , Any ], name : str
214+ ) -> Column :
215+ """
216+ Creates a Synapse Column based on a JSON Schema property.
217+
218+ Args:
219+ js_property: A JSON Schema property in dict form.
220+ name: The name of the column.
221+
222+ Returns:
223+ A Synapse Column based on the JSON Schema property.
224+ """
225+ column_type = _get_column_type_from_js_property (js_property )
226+ maximum_size = None
227+ maximum_list_length = None
228+ if column_type in LIST_TYPE_DICT .values ():
229+ maximum_list_length = MAX_LIST_LENGTH
230+ if column_type == ColumnType .STRING_LIST :
231+ maximum_size = MAX_LIST_STRING_ITEM_SIZE
232+
233+ return Column (
234+ name = name ,
235+ column_type = column_type ,
236+ maximum_size = maximum_size ,
237+ maximum_list_length = maximum_list_length ,
238+ )
239+
240+
221241def _get_column_type_from_js_property (js_property : dict [str , Any ]) -> ColumnType :
222242 """
223243 Gets the Synapse column type from a JSON Schema property.
224244 The JSON Schema should be valid but that should not be assumed.
225- If the type can not be determined ColumnType.STRING will be returned.
245+ If the type can not be determined ColumnType.MEDIUMTEXT will be returned.
226246
227247 Args:
228248 js_property: A JSON Schema property in dict form.
229249
230250 Returns:
231251 A Synapse ColumnType based on the JSON Schema type
232252 """
233- # Enums are always strings in Synapse tables
253+ # Enums are set as MediumText columns
234254 if "enum" in js_property :
235- return ColumnType .STRING
255+ return ColumnType .MEDIUMTEXT
236256 if "type" in js_property :
237- if js_property ["type" ] == "array" :
257+ js_type = js_property ["type" ]
258+ # Synapse columns cannot be more than one type
259+ # If the JSONSchema type is a list of types, check if it's a nullable single type
260+ if isinstance (js_type , list ):
261+ types = [t for t in js_type if t != "null" ]
262+ if len (types ) == 1 :
263+ js_type = types [0 ]
264+ else :
265+ return ColumnType .MEDIUMTEXT
266+ if js_type == "array" :
238267 return _get_list_column_type_from_js_property (js_property )
239- return TYPE_DICT .get (js_property ["type" ], ColumnType .STRING )
268+ # If there is only one JSONSChema type, return the corresponding Synapse column type,
269+ # defaulting to MediumText if there is no match
270+ return TYPE_DICT .get (js_type , ColumnType .MEDIUMTEXT )
240271 # A oneOf list usually indicates that the type could be one or more different things
272+ # Curator extension does not create the types of JSON Schemas where this is the case
273+ # but if it is present we will attempt to determine the type based on the items in the oneOf list.
241274 if "oneOf" in js_property and isinstance (js_property ["oneOf" ], list ):
242275 return _get_column_type_from_js_one_of_list (js_property ["oneOf" ])
243- return ColumnType .STRING
276+ return ColumnType .MEDIUMTEXT
244277
245278
246279def _get_column_type_from_js_one_of_list (js_one_of_list : list [Any ]) -> ColumnType :
@@ -258,15 +291,15 @@ def _get_column_type_from_js_one_of_list(js_one_of_list: list[Any]) -> ColumnTyp
258291 items = [item for item in js_one_of_list if isinstance (item , dict )]
259292 # Enums are always strings in Synapse tables
260293 if [item for item in items if "enum" in item ]:
261- return ColumnType .STRING
294+ return ColumnType .MEDIUMTEXT
262295 # For Synapse ColumnType we can ignore null types in JSON Schemas
263296 type_items = [item for item in items if "type" in item if item ["type" ] != "null" ]
264297 if len (type_items ) == 1 :
265298 type_item = type_items [0 ]
266299 if type_item ["type" ] == "array" :
267300 return _get_list_column_type_from_js_property (type_item )
268- return TYPE_DICT .get (type_item ["type" ], ColumnType .STRING )
269- return ColumnType .STRING
301+ return TYPE_DICT .get (type_item ["type" ], ColumnType .MEDIUMTEXT )
302+ return ColumnType .MEDIUMTEXT
270303
271304
272305def _get_list_column_type_from_js_property (js_property : dict [str , Any ]) -> ColumnType :
0 commit comments