11from typing import List , Dict , Optional , Any
2- from pydantic import BaseModel
2+ from pydantic import BaseModel , Field , StrictInt , StrictStr
33from enum import Enum
44import pandas as pd
55from json2table import convert
@@ -58,6 +58,7 @@ def _repr_html_(self):
5858
5959# END Classes for Foundry Data Package Specification
6060
61+
6162class FoundryDatasetType (Enum ):
6263 """Foundry Dataset Types
6364 Enumeration of the possible Foundry dataset types
@@ -70,23 +71,40 @@ class FoundryDatasetType(Enum):
7071
7172
7273class FoundryKeyClass (BaseModel ):
73- label : str = ""
74- name : str = ""
74+ label : StrictStr = Field (..., description = "The label that exists in the data" )
75+ name : StrictStr = Field (..., description = "The name the label maps onto." )
7576
7677
7778class FoundryKey (BaseModel ):
78- key : List [str ] = []
79- type : str = ""
80- filter : Optional [str ] = ""
81- units : Optional [str ] = ""
82- description : Optional [str ] = ""
79+ key : List [StrictStr ] = Field (..., description = "Column or header name for tabular data, key/path for HDF5 data" )
80+ type : StrictStr = Field (..., description = "Whether input or target" )
8381 classes : Optional [List [FoundryKeyClass ]]
82+ description : Optional [StrictStr ]
83+ filter : Optional [StrictStr ]
84+ units : Optional [StrictStr ]
8485
8586
8687class FoundrySplit (BaseModel ):
87- type : str = ""
88- path : Optional [str ] = ""
89- label : Optional [str ] = ""
88+ type : StrictStr = Field (..., description = "The kind of partition of the dataset (train, test, validation, etc)" )
89+ path : Optional [StrictStr ]
90+ label : Optional [StrictStr ]
91+
92+
93+ class FoundryMetadata (BaseModel ):
94+ """Foundry Dataset
95+ Schema for Foundry Datasets. This includes specifications of inputs, outputs, type, version, and more
96+ """
97+ data_type : FoundryDatasetType = Field (..., description = "The kind of data in the dataset, e.g. tabular, json, hdf5" )
98+ domain : List [StrictStr ] = Field (..., description = "The domain of applicability. e.g., materials science, chemistry, machine vision" )
99+ keys : List [FoundryKey ] = Field (..., description = "Keys describing how to load the data" )
100+ dataframe : Optional [Any ]
101+ n_items : Optional [StrictInt ]
102+ short_name : Optional [StrictStr ]
103+ splits : Optional [List [FoundrySplit ]]
104+ task_type : Optional [List [StrictStr ]]
105+
106+ class Config :
107+ arbitrary_types_allowed = True
90108
91109
92110class FoundryDataset (BaseModel ):
@@ -134,7 +152,7 @@ def _repr_html_(self):
134152 return convert (json .loads (self .json ()))
135153
136154
137- class FoundryMetadata (BaseModel ):
155+ class FoundryBase (BaseModel ):
138156 dc : Optional [Dict ] = {} # pydantic Datacite?
139157 mdf : Optional [Dict ] = {}
140158 dataset : FoundryDataset = {}
0 commit comments