feat: LookML feature parity and improved logging

edgardevo · edgardevo · commit 9b6a80471bdc · 2026-03-17T21:45:09.000+01:00
diff --git a/dataform2looker/database_mappers.py b/dataform2looker/database_mappers.py
@@ -90,12 +90,15 @@ class GenericTable:
         UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided.
     """  # noqa: E501
 
-    def __init__(self, table_id: str, db_type: str = "bigquery") -> None:
+    def __init__(
+        self, table_id: str, db_type: str = "bigquery", dataform_table: dict = None
+    ) -> None:
         """Initializes the `GenericTable` object based on the database type.
 
         Args:
             table_id: The full ID of the table in the database.
             db_type: The type of the database ("bigquery" currently supported).
+            dataform_table: Optional dataform table dictionary containing metadata.
 
         Raises:
             UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided.
@@ -106,30 +109,51 @@ def __init__(self, table_id: str, db_type: str = "bigquery") -> None:
         self.table_id = table_id
         self.table_name = self.__table.table_name
         self.__db_type = db_type
-        # TODO implement self.description = self.__table.description
-        # This is not implemented at the moment lkml views don't support descriptions
-        # At the moment the dictionary for views and dimensions are built
-        # this is because the lkml lib requires the dict
-        # in case something different is used then we would need to
-        # re-factor the dictionary for GenericTable and Column
-        self.dimensions = [
-            column.column_dictionary
-            for column in self.__table.columns
-            if column.dimension_type == "dimension"
-        ]
+
+        column_descriptions = {}
+        if dataform_table and "actionDescriptor" in dataform_table:
+            # We can store the table_description if needed later for view description
+            self.description = dataform_table["actionDescriptor"].get("description", "")
+            for col in dataform_table["actionDescriptor"].get("columns", []):
+                col_name = ".".join(col["path"])
+                column_descriptions[col_name] = col.get("description", "")
+
+        self.dimensions = []
+        for column in self.__table.columns:
+            if column.dimension_type == "dimension":
+                col_dict = column.column_dictionary.copy()
+                if col_dict["name"] in column_descriptions:
+                    col_dict["description"] = column_descriptions[col_dict["name"]]
+                self.dimensions.append(col_dict)
+
         logging.debug(f"Dimensions for table {self.table_name}: {self.dimensions}")
-        self.dimension_group = [
-            column.column_dictionary
-            for column in self.__table.columns
-            if column.dimension_type == "time_dimension_group"
-        ]
+
+        self.dimension_group = []
+        for column in self.__table.columns:
+            if column.dimension_type == "time_dimension_group":
+                col_dict = column.column_dictionary.copy()
+                if col_dict["name"] in column_descriptions:
+                    col_dict["description"] = column_descriptions[col_dict["name"]]
+                self.dimension_group.append(col_dict)
         logging.debug(
             f"Dimensions Group for table {self.table_name}: {self.dimension_group}"
         )
         self.measures = [{"type": "count", "name": "count"}]
-        # TODO it should be possible to include other measures by passing an argument
-        # Include measures if needed such as sums of all number dimensions
-        # include count_distinct
+        for dim in self.dimensions:
+            if dim["type"] == "number":
+                dim_name = dim["name"]
+                self.measures.append({
+                    "type": "sum",
+                    "name": f"total_{dim_name}",
+                    "sql": f"${{{dim_name}}}",
+                    "description": f"Total of {dim_name}",
+                })
+                self.measures.append({
+                    "type": "average",
+                    "name": f"average_{dim_name}",
+                    "sql": f"${{{dim_name}}}",
+                    "description": f"Average of {dim_name}",
+                })
 
         self.table_dictionary = {
             "view": {
@@ -230,14 +254,25 @@ def __get_columns(self) -> list[Column]:
         client = bigquery.Client()
         table = client.get_table(self.table_id)
         logging.debug(f"Got table schema from table {self.table_id}")
-        columns = [
-            Column(
-                name=field.name,
-                description=field.description,
-                field_type=self._LOOKER_TYPE_MAP[field.field_type],
-                data_type=field.field_type.lower(),
-                time_frames=self._TIME_FRAMES_MAP.get(field.field_type, None),
-            )
-            for field in table.schema
-        ]
+        columns = []
+
+        def process_fields(fields: list, prefix: str = "") -> None:
+            for field in fields:
+                name = f"{prefix}{field.name}"
+                if field.field_type == "RECORD":
+                    process_fields(field.fields, prefix=f"{name}.")
+                else:
+                    looker_type = self._LOOKER_TYPE_MAP.get(field.field_type, "string")
+                    time_frames = self._TIME_FRAMES_MAP.get(field.field_type, None)
+                    columns.append(
+                        Column(
+                            name=name,
+                            description=field.description,
+                            field_type=looker_type,
+                            data_type=field.field_type.lower(),
+                            time_frames=time_frames,
+                        )
+                    )
+
+        process_fields(table.schema)
         return columns
diff --git a/dataform2looker/dataform2looker.py b/dataform2looker/dataform2looker.py
@@ -10,20 +10,37 @@
 from dataform2looker.lookml import LookML
 
 
-def _generate_view(path_to_json_file: str, target_dir: str, tags: set[str]) -> int:
+def _generate_view(
+    path_to_json_file: str,
+    target_dir: str,
+    tags: set[str],
+    global_labels: dict[str, str] = None,
+    global_group_labels: dict[str, str] = None,
+    custom_timeframes: list[str] = None,
+) -> int:
     """Generates LookML view files from a Dataform model.
 
     Args:
         path_to_json_file (str): Path to the JSON file from compiled Dataform project.
         target_dir (str): Target directory for Looker views.
         tags (set[str]): Filter to dataform models using this tag.
+        global_labels (dict[str, str]): Global labels to apply.
+        global_group_labels (dict[str, str]): Global group labels to apply.
+        custom_timeframes (list[str]): Custom timeframes for dimension groups.
 
     Returns:
         int: 0 if the view generation was successful, 1 otherwise.
     """
     logging.info(f" Generating views from: {path_to_json_file}")
     try:
-        lookml_object = LookML(path_to_json_file, target_dir, tags=tags)
+        lookml_object = LookML(
+            path_to_json_file,
+            target_dir,
+            tags=tags,
+            global_labels=global_labels,
+            global_group_labels=global_group_labels,
+            custom_timeframes=custom_timeframes,
+        )
         lookml_object.save_lookml_views()
         return 0
     except subprocess.CalledProcessError as e:
@@ -70,17 +87,52 @@ def main(argv: Sequence[str] | None = None) -> int:
         required=False,
     )
 
+    parser.add_argument(
+        "--global-labels",
+        help="Global labels to apply (key=value format, can be used multiple times)",
+        action="append",
+        default=[],
+    )
+
+    parser.add_argument(
+        "--global-group-labels",
+        help="Global group labels to apply (key=value, multiple allowed)",
+        action="append",
+        default=[],
+    )
+
+    parser.add_argument(
+        "--custom-timeframes",
+        help="Custom timeframes to use for dimension groups",
+        default=[],
+        type=str,
+        nargs="+",
+    )
+
     args = parser.parse_args(argv)
 
     source_file = args.source_file_path
     target_dir = args.target_dir
     verbose = args.verbose
     tags = args.tags
 
+    global_labels = dict(item.split("=") for item in args.global_labels if "=" in item)
+    global_group_labels = dict(
+        item.split("=") for item in args.global_group_labels if "=" in item
+    )
+    custom_timeframes = args.custom_timeframes
+
     logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
 
     if source_file.is_file():
         logging.info(f" Processing file: {source_file}")
-        return _generate_view(str(source_file), str(target_dir), set(tags))
+        return _generate_view(
+            str(source_file),
+            str(target_dir),
+            set(tags),
+            global_labels=global_labels,
+            global_group_labels=global_group_labels,
+            custom_timeframes=custom_timeframes,
+        )
     logging.error("The provided path is not taking to a JSON file")
     sys.exit(1)
diff --git a/dataform2looker/lookml.py b/dataform2looker/lookml.py
@@ -25,6 +25,9 @@ def __init__(
         target_folder_path: str,
         db_type: str = "bigquery",
         tags: list[str] = None,
+        global_labels: dict[str, str] = None,
+        global_group_labels: dict[str, str] = None,
+        custom_timeframes: list[str] = None,
     ) -> None:
         """Initializes the `LookML` object.
 
@@ -33,26 +36,38 @@ def __init__(
             target_folder_path: The target folder for LookML view files.
             db_type: The type of the database ("bigquery" currently supported).
             tags: A list of tags to filter tables (not yet implemented).
+            global_labels: A dictionary of global labels for views.
+            global_group_labels: A dictionary of global group labels for views.
+            custom_timeframes: A list of custom timeframes for time dimension groups.
         """  # noqa: E501
         self.source_json_path = source_json_path
         self.db_type = db_type
         self.tags = set(tags or [])
-        self.__tables_ids = self.__get_list_of_table_ids()
-        self.__tables_list = self.__initialize_tables(self.__tables_ids)
+        self.global_labels = global_labels or {}
+        self.global_group_labels = global_group_labels or {}
+        self.custom_timeframes = custom_timeframes
+        self.__tables_data = self.__get_list_of_tables()
+        self.__tables_list = self.__initialize_tables(self.__tables_data)
         self.lookml_templates = self.__generate_lookml_templates(self.__tables_list)
         self.target_folder_path = target_folder_path
 
     def save_lookml_views(self) -> None:
         """Generates and saves LookML view files for each table."""  # noqa: E501
+        import os
+
+        os.makedirs(self.target_folder_path, exist_ok=True)
+
         for table_name, table_template in self.lookml_templates.items():
             file_path = f"{self.target_folder_path}/" f"{table_name}.view.lkml"
             logging.debug(f"Creating file {file_path}")
             with open(file_path, "w") as f:
                 f.write(table_template)
-        logging.info(
-            f"A total of {len(self.lookml_templates)} LookML view files successfully \
-                created in folder '{self.target_folder_path}'"
+
+        msg = (
+            f"Generated {len(self.lookml_templates)} views in {self.target_folder_path}"
         )
+        logging.info(msg)
+        print(f"\n✅ {msg}\n")
 
     def __generate_lookml_templates(self, tables_list: list[GenericTable]) -> dict:
         """Generates LookML view templates for a list of `GenericTable` objects.
@@ -65,48 +80,61 @@ def __generate_lookml_templates(self, tables_list: list[GenericTable]) -> dict:
         """  # noqa: E501
         lookml_tables = {}
         for table in tables_list:
+            view_dict = table.table_dictionary["view"]
+            if self.global_labels and view_dict["name"] in self.global_labels:
+                view_dict["label"] = self.global_labels[view_dict["name"]]
+
+            if (
+                self.global_group_labels
+                and view_dict["name"] in self.global_group_labels
+            ):
+                view_dict["group_label"] = self.global_group_labels[view_dict["name"]]
+
+            # Apply custom timeframes to all dimension groups
+            if self.custom_timeframes:
+                for group in view_dict.get("dimension_groups", []):
+                    group["timeframes"] = self.custom_timeframes
+
             lookml_tables[table.table_name] = lkml.dump(table.table_dictionary)
-            # TODO check if we should use lkml dump to create the file
-            # If we want to control the saving of the file might be easier
-            # to do it outside the lib
-            # https://lkml.readthedocs.io/en/latest/lkml.html#module-lkml
         return lookml_tables
 
     def __initialize_tables(
         self,
-        tables_ids: list[str],
+        tables_data: list[dict],
     ) -> list[GenericTable]:
-        """Initializes `GenericTable` objects for a list of table IDs.
+        """Initializes `GenericTable` objects for a list of table dictionaries.
 
         Args:
-            tables_ids: A list of table IDs.
+            tables_data: A list of table dictionaries.
 
         Returns:
             A list of `GenericTable` objects representing the tables.
         """  # noqa: E501
-        tables_list = [GenericTable(table_id, self.db_type) for table_id in tables_ids]
+        tables_list = []
+        for table_dict in tables_data:
+            target = table_dict["target"]
+            table_id = f"{target['database']}.{target['schema']}.{target['name']}"
+            tables_list.append(
+                GenericTable(table_id, self.db_type, dataform_table=table_dict)
+            )
         return tables_list
 
-    def __get_list_of_table_ids(self) -> list[str]:
-        """Extracts table IDs from the source JSON file.
+    def __get_list_of_tables(self) -> list[dict]:
+        """Extracts table dictionaries from the source JSON file.
 
         Returns:
-            A list of table IDs in the format "project.dataset.table".
+            A list of table dictionaries.
         """  # noqa: E501
         with open(self.source_json_path) as file:
             data = json.load(file)
-            tables = data["tables"]
+            tables = data.get("tables", [])
         if self.tags:
-            table_id_list = [
-                f"{table['target']['database']}.{table['target']['schema']}.{table['target']['name']}"
+            table_list = [
+                table
                 for table in tables
-                if self.tags.intersection(set(table["tags"]))
+                if self.tags.intersection(set(table.get("tags", [])))
             ]
         else:
-            table_id_list = [
-                f"{table['target']['database']}.{table['target']['schema']}.{table['target']['name']}"
-                for table in tables
-            ]
-        logging.debug(f"Table id list: {table_id_list}")
+            table_list = [table for table in tables]
         logging.debug(f"Read file {self.source_json_path}, found {len(tables)}")
-        return table_id_list
+        return table_list
diff --git a/tests/test_column.py b/tests/test_column.py
@@ -45,7 +45,7 @@ def test_dimension_dictionar(self, my_column: Column) -> None:
             "name": my_column_name,
             "type": my_field_type,
             "description": my_column_description,
-            "sql": f"{{TABLE}}.{my_column_name}",
+            "sql": f"${{TABLE}}.{my_column_name}",
         }
         assert my_column.column_dictionary == column_dictionary
 

Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ def test_dimension_dictionar(self, my_column: Column) -> None:`
`45`	`45`	`"name": my_column_name,`
`46`	`46`	`"type": my_field_type,`
`47`	`47`	`"description": my_column_description,`
`48`		`- "sql": f"{{TABLE}}.{my_column_name}",`
	`48`	`+ "sql": f"${{TABLE}}.{my_column_name}",`
`49`	`49`	`}`
`50`	`50`	`assert my_column.column_dictionary == column_dictionary`
`51`	`51`