@@ -90,12 +90,15 @@ class GenericTable:
9090 UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided.
9191 """ # noqa: E501
9292
93- def __init__ (self , table_id : str , db_type : str = "bigquery" ) -> None :
93+ def __init__ (
94+ self , table_id : str , db_type : str = "bigquery" , dataform_table : dict = None
95+ ) -> None :
9496 """Initializes the `GenericTable` object based on the database type.
9597
9698 Args:
9799 table_id: The full ID of the table in the database.
98100 db_type: The type of the database ("bigquery" currently supported).
101+ dataform_table: Optional dataform table dictionary containing metadata.
99102
100103 Raises:
101104 UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided.
@@ -106,30 +109,51 @@ def __init__(self, table_id: str, db_type: str = "bigquery") -> None:
106109 self .table_id = table_id
107110 self .table_name = self .__table .table_name
108111 self .__db_type = db_type
109- # TODO implement self.description = self.__table.description
110- # This is not implemented at the moment lkml views don't support descriptions
111- # At the moment the dictionary for views and dimensions are built
112- # this is because the lkml lib requires the dict
113- # in case something different is used then we would need to
114- # re-factor the dictionary for GenericTable and Column
115- self .dimensions = [
116- column .column_dictionary
117- for column in self .__table .columns
118- if column .dimension_type == "dimension"
119- ]
112+
113+ column_descriptions = {}
114+ if dataform_table and "actionDescriptor" in dataform_table :
115+ # We can store the table_description if needed later for view description
116+ self .description = dataform_table ["actionDescriptor" ].get ("description" , "" )
117+ for col in dataform_table ["actionDescriptor" ].get ("columns" , []):
118+ col_name = "." .join (col ["path" ])
119+ column_descriptions [col_name ] = col .get ("description" , "" )
120+
121+ self .dimensions = []
122+ for column in self .__table .columns :
123+ if column .dimension_type == "dimension" :
124+ col_dict = column .column_dictionary .copy ()
125+ if col_dict ["name" ] in column_descriptions :
126+ col_dict ["description" ] = column_descriptions [col_dict ["name" ]]
127+ self .dimensions .append (col_dict )
128+
120129 logging .debug (f"Dimensions for table { self .table_name } : { self .dimensions } " )
121- self .dimension_group = [
122- column .column_dictionary
123- for column in self .__table .columns
124- if column .dimension_type == "time_dimension_group"
125- ]
130+
131+ self .dimension_group = []
132+ for column in self .__table .columns :
133+ if column .dimension_type == "time_dimension_group" :
134+ col_dict = column .column_dictionary .copy ()
135+ if col_dict ["name" ] in column_descriptions :
136+ col_dict ["description" ] = column_descriptions [col_dict ["name" ]]
137+ self .dimension_group .append (col_dict )
126138 logging .debug (
127139 f"Dimensions Group for table { self .table_name } : { self .dimension_group } "
128140 )
129141 self .measures = [{"type" : "count" , "name" : "count" }]
130- # TODO it should be possible to include other measures by passing an argument
131- # Include measures if needed such as sums of all number dimensions
132- # include count_distinct
142+ for dim in self .dimensions :
143+ if dim ["type" ] == "number" :
144+ dim_name = dim ["name" ]
145+ self .measures .append ({
146+ "type" : "sum" ,
147+ "name" : f"total_{ dim_name } " ,
148+ "sql" : f"${{{ dim_name } }}" ,
149+ "description" : f"Total of { dim_name } " ,
150+ })
151+ self .measures .append ({
152+ "type" : "average" ,
153+ "name" : f"average_{ dim_name } " ,
154+ "sql" : f"${{{ dim_name } }}" ,
155+ "description" : f"Average of { dim_name } " ,
156+ })
133157
134158 self .table_dictionary = {
135159 "view" : {
@@ -230,14 +254,25 @@ def __get_columns(self) -> list[Column]:
230254 client = bigquery .Client ()
231255 table = client .get_table (self .table_id )
232256 logging .debug (f"Got table schema from table { self .table_id } " )
233- columns = [
234- Column (
235- name = field .name ,
236- description = field .description ,
237- field_type = self ._LOOKER_TYPE_MAP [field .field_type ],
238- data_type = field .field_type .lower (),
239- time_frames = self ._TIME_FRAMES_MAP .get (field .field_type , None ),
240- )
241- for field in table .schema
242- ]
257+ columns = []
258+
259+ def process_fields (fields : list , prefix : str = "" ) -> None :
260+ for field in fields :
261+ name = f"{ prefix } { field .name } "
262+ if field .field_type == "RECORD" :
263+ process_fields (field .fields , prefix = f"{ name } ." )
264+ else :
265+ looker_type = self ._LOOKER_TYPE_MAP .get (field .field_type , "string" )
266+ time_frames = self ._TIME_FRAMES_MAP .get (field .field_type , None )
267+ columns .append (
268+ Column (
269+ name = name ,
270+ description = field .description ,
271+ field_type = looker_type ,
272+ data_type = field .field_type .lower (),
273+ time_frames = time_frames ,
274+ )
275+ )
276+
277+ process_fields (table .schema )
243278 return columns
0 commit comments