Skip to content
This repository was archived by the owner on Mar 31, 2026. It is now read-only.

Commit 9b6a804

Browse files
committed
feat: LookML feature parity and improved logging
1 parent 007b0c7 commit 9b6a804

4 files changed

Lines changed: 175 additions & 60 deletions

File tree

dataform2looker/database_mappers.py

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,15 @@ class GenericTable:
9090
UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided.
9191
""" # noqa: E501
9292

93-
def __init__(self, table_id: str, db_type: str = "bigquery") -> None:
93+
def __init__(
94+
self, table_id: str, db_type: str = "bigquery", dataform_table: dict = None
95+
) -> None:
9496
"""Initializes the `GenericTable` object based on the database type.
9597
9698
Args:
9799
table_id: The full ID of the table in the database.
98100
db_type: The type of the database ("bigquery" currently supported).
101+
dataform_table: Optional dataform table dictionary containing metadata.
99102
100103
Raises:
101104
UnsupportedDatabaseTypeError: If an unsupported `db_type` is provided.
@@ -106,30 +109,51 @@ def __init__(self, table_id: str, db_type: str = "bigquery") -> None:
106109
self.table_id = table_id
107110
self.table_name = self.__table.table_name
108111
self.__db_type = db_type
109-
# TODO implement self.description = self.__table.description
110-
# This is not implemented at the moment lkml views don't support descriptions
111-
# At the moment the dictionary for views and dimensions are built
112-
# this is because the lkml lib requires the dict
113-
# in case something different is used then we would need to
114-
# re-factor the dictionary for GenericTable and Column
115-
self.dimensions = [
116-
column.column_dictionary
117-
for column in self.__table.columns
118-
if column.dimension_type == "dimension"
119-
]
112+
113+
column_descriptions = {}
114+
if dataform_table and "actionDescriptor" in dataform_table:
115+
# We can store the table_description if needed later for view description
116+
self.description = dataform_table["actionDescriptor"].get("description", "")
117+
for col in dataform_table["actionDescriptor"].get("columns", []):
118+
col_name = ".".join(col["path"])
119+
column_descriptions[col_name] = col.get("description", "")
120+
121+
self.dimensions = []
122+
for column in self.__table.columns:
123+
if column.dimension_type == "dimension":
124+
col_dict = column.column_dictionary.copy()
125+
if col_dict["name"] in column_descriptions:
126+
col_dict["description"] = column_descriptions[col_dict["name"]]
127+
self.dimensions.append(col_dict)
128+
120129
logging.debug(f"Dimensions for table {self.table_name}: {self.dimensions}")
121-
self.dimension_group = [
122-
column.column_dictionary
123-
for column in self.__table.columns
124-
if column.dimension_type == "time_dimension_group"
125-
]
130+
131+
self.dimension_group = []
132+
for column in self.__table.columns:
133+
if column.dimension_type == "time_dimension_group":
134+
col_dict = column.column_dictionary.copy()
135+
if col_dict["name"] in column_descriptions:
136+
col_dict["description"] = column_descriptions[col_dict["name"]]
137+
self.dimension_group.append(col_dict)
126138
logging.debug(
127139
f"Dimensions Group for table {self.table_name}: {self.dimension_group}"
128140
)
129141
self.measures = [{"type": "count", "name": "count"}]
130-
# TODO it should be possible to include other measures by passing an argument
131-
# Include measures if needed such as sums of all number dimensions
132-
# include count_distinct
142+
for dim in self.dimensions:
143+
if dim["type"] == "number":
144+
dim_name = dim["name"]
145+
self.measures.append({
146+
"type": "sum",
147+
"name": f"total_{dim_name}",
148+
"sql": f"${{{dim_name}}}",
149+
"description": f"Total of {dim_name}",
150+
})
151+
self.measures.append({
152+
"type": "average",
153+
"name": f"average_{dim_name}",
154+
"sql": f"${{{dim_name}}}",
155+
"description": f"Average of {dim_name}",
156+
})
133157

134158
self.table_dictionary = {
135159
"view": {
@@ -230,14 +254,25 @@ def __get_columns(self) -> list[Column]:
230254
client = bigquery.Client()
231255
table = client.get_table(self.table_id)
232256
logging.debug(f"Got table schema from table {self.table_id}")
233-
columns = [
234-
Column(
235-
name=field.name,
236-
description=field.description,
237-
field_type=self._LOOKER_TYPE_MAP[field.field_type],
238-
data_type=field.field_type.lower(),
239-
time_frames=self._TIME_FRAMES_MAP.get(field.field_type, None),
240-
)
241-
for field in table.schema
242-
]
257+
columns = []
258+
259+
def process_fields(fields: list, prefix: str = "") -> None:
260+
for field in fields:
261+
name = f"{prefix}{field.name}"
262+
if field.field_type == "RECORD":
263+
process_fields(field.fields, prefix=f"{name}.")
264+
else:
265+
looker_type = self._LOOKER_TYPE_MAP.get(field.field_type, "string")
266+
time_frames = self._TIME_FRAMES_MAP.get(field.field_type, None)
267+
columns.append(
268+
Column(
269+
name=name,
270+
description=field.description,
271+
field_type=looker_type,
272+
data_type=field.field_type.lower(),
273+
time_frames=time_frames,
274+
)
275+
)
276+
277+
process_fields(table.schema)
243278
return columns

dataform2looker/dataform2looker.py

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,37 @@
1010
from dataform2looker.lookml import LookML
1111

1212

13-
def _generate_view(path_to_json_file: str, target_dir: str, tags: set[str]) -> int:
13+
def _generate_view(
14+
path_to_json_file: str,
15+
target_dir: str,
16+
tags: set[str],
17+
global_labels: dict[str, str] = None,
18+
global_group_labels: dict[str, str] = None,
19+
custom_timeframes: list[str] = None,
20+
) -> int:
1421
"""Generates LookML view files from a Dataform model.
1522
1623
Args:
1724
path_to_json_file (str): Path to the JSON file from compiled Dataform project.
1825
target_dir (str): Target directory for Looker views.
1926
tags (set[str]): Filter to dataform models using this tag.
27+
global_labels (dict[str, str]): Global labels to apply.
28+
global_group_labels (dict[str, str]): Global group labels to apply.
29+
custom_timeframes (list[str]): Custom timeframes for dimension groups.
2030
2131
Returns:
2232
int: 0 if the view generation was successful, 1 otherwise.
2333
"""
2434
logging.info(f" Generating views from: {path_to_json_file}")
2535
try:
26-
lookml_object = LookML(path_to_json_file, target_dir, tags=tags)
36+
lookml_object = LookML(
37+
path_to_json_file,
38+
target_dir,
39+
tags=tags,
40+
global_labels=global_labels,
41+
global_group_labels=global_group_labels,
42+
custom_timeframes=custom_timeframes,
43+
)
2744
lookml_object.save_lookml_views()
2845
return 0
2946
except subprocess.CalledProcessError as e:
@@ -70,17 +87,52 @@ def main(argv: Sequence[str] | None = None) -> int:
7087
required=False,
7188
)
7289

90+
parser.add_argument(
91+
"--global-labels",
92+
help="Global labels to apply (key=value format, can be used multiple times)",
93+
action="append",
94+
default=[],
95+
)
96+
97+
parser.add_argument(
98+
"--global-group-labels",
99+
help="Global group labels to apply (key=value, multiple allowed)",
100+
action="append",
101+
default=[],
102+
)
103+
104+
parser.add_argument(
105+
"--custom-timeframes",
106+
help="Custom timeframes to use for dimension groups",
107+
default=[],
108+
type=str,
109+
nargs="+",
110+
)
111+
73112
args = parser.parse_args(argv)
74113

75114
source_file = args.source_file_path
76115
target_dir = args.target_dir
77116
verbose = args.verbose
78117
tags = args.tags
79118

119+
global_labels = dict(item.split("=") for item in args.global_labels if "=" in item)
120+
global_group_labels = dict(
121+
item.split("=") for item in args.global_group_labels if "=" in item
122+
)
123+
custom_timeframes = args.custom_timeframes
124+
80125
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
81126

82127
if source_file.is_file():
83128
logging.info(f" Processing file: {source_file}")
84-
return _generate_view(str(source_file), str(target_dir), set(tags))
129+
return _generate_view(
130+
str(source_file),
131+
str(target_dir),
132+
set(tags),
133+
global_labels=global_labels,
134+
global_group_labels=global_group_labels,
135+
custom_timeframes=custom_timeframes,
136+
)
85137
logging.error("The provided path is not taking to a JSON file")
86138
sys.exit(1)

dataform2looker/lookml.py

Lines changed: 54 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ def __init__(
2525
target_folder_path: str,
2626
db_type: str = "bigquery",
2727
tags: list[str] = None,
28+
global_labels: dict[str, str] = None,
29+
global_group_labels: dict[str, str] = None,
30+
custom_timeframes: list[str] = None,
2831
) -> None:
2932
"""Initializes the `LookML` object.
3033
@@ -33,26 +36,38 @@ def __init__(
3336
target_folder_path: The target folder for LookML view files.
3437
db_type: The type of the database ("bigquery" currently supported).
3538
tags: A list of tags to filter tables (not yet implemented).
39+
global_labels: A dictionary of global labels for views.
40+
global_group_labels: A dictionary of global group labels for views.
41+
custom_timeframes: A list of custom timeframes for time dimension groups.
3642
""" # noqa: E501
3743
self.source_json_path = source_json_path
3844
self.db_type = db_type
3945
self.tags = set(tags or [])
40-
self.__tables_ids = self.__get_list_of_table_ids()
41-
self.__tables_list = self.__initialize_tables(self.__tables_ids)
46+
self.global_labels = global_labels or {}
47+
self.global_group_labels = global_group_labels or {}
48+
self.custom_timeframes = custom_timeframes
49+
self.__tables_data = self.__get_list_of_tables()
50+
self.__tables_list = self.__initialize_tables(self.__tables_data)
4251
self.lookml_templates = self.__generate_lookml_templates(self.__tables_list)
4352
self.target_folder_path = target_folder_path
4453

4554
def save_lookml_views(self) -> None:
4655
"""Generates and saves LookML view files for each table.""" # noqa: E501
56+
import os
57+
58+
os.makedirs(self.target_folder_path, exist_ok=True)
59+
4760
for table_name, table_template in self.lookml_templates.items():
4861
file_path = f"{self.target_folder_path}/" f"{table_name}.view.lkml"
4962
logging.debug(f"Creating file {file_path}")
5063
with open(file_path, "w") as f:
5164
f.write(table_template)
52-
logging.info(
53-
f"A total of {len(self.lookml_templates)} LookML view files successfully \
54-
created in folder '{self.target_folder_path}'"
65+
66+
msg = (
67+
f"Generated {len(self.lookml_templates)} views in {self.target_folder_path}"
5568
)
69+
logging.info(msg)
70+
print(f"\n{msg}\n")
5671

5772
def __generate_lookml_templates(self, tables_list: list[GenericTable]) -> dict:
5873
"""Generates LookML view templates for a list of `GenericTable` objects.
@@ -65,48 +80,61 @@ def __generate_lookml_templates(self, tables_list: list[GenericTable]) -> dict:
6580
""" # noqa: E501
6681
lookml_tables = {}
6782
for table in tables_list:
83+
view_dict = table.table_dictionary["view"]
84+
if self.global_labels and view_dict["name"] in self.global_labels:
85+
view_dict["label"] = self.global_labels[view_dict["name"]]
86+
87+
if (
88+
self.global_group_labels
89+
and view_dict["name"] in self.global_group_labels
90+
):
91+
view_dict["group_label"] = self.global_group_labels[view_dict["name"]]
92+
93+
# Apply custom timeframes to all dimension groups
94+
if self.custom_timeframes:
95+
for group in view_dict.get("dimension_groups", []):
96+
group["timeframes"] = self.custom_timeframes
97+
6898
lookml_tables[table.table_name] = lkml.dump(table.table_dictionary)
69-
# TODO check if we should use lkml dump to create the file
70-
# If we want to control the saving of the file might be easier
71-
# to do it outside the lib
72-
# https://lkml.readthedocs.io/en/latest/lkml.html#module-lkml
7399
return lookml_tables
74100

75101
def __initialize_tables(
76102
self,
77-
tables_ids: list[str],
103+
tables_data: list[dict],
78104
) -> list[GenericTable]:
79-
"""Initializes `GenericTable` objects for a list of table IDs.
105+
"""Initializes `GenericTable` objects for a list of table dictionaries.
80106
81107
Args:
82-
tables_ids: A list of table IDs.
108+
tables_data: A list of table dictionaries.
83109
84110
Returns:
85111
A list of `GenericTable` objects representing the tables.
86112
""" # noqa: E501
87-
tables_list = [GenericTable(table_id, self.db_type) for table_id in tables_ids]
113+
tables_list = []
114+
for table_dict in tables_data:
115+
target = table_dict["target"]
116+
table_id = f"{target['database']}.{target['schema']}.{target['name']}"
117+
tables_list.append(
118+
GenericTable(table_id, self.db_type, dataform_table=table_dict)
119+
)
88120
return tables_list
89121

90-
def __get_list_of_table_ids(self) -> list[str]:
91-
"""Extracts table IDs from the source JSON file.
122+
def __get_list_of_tables(self) -> list[dict]:
123+
"""Extracts table dictionaries from the source JSON file.
92124
93125
Returns:
94-
A list of table IDs in the format "project.dataset.table".
126+
A list of table dictionaries.
95127
""" # noqa: E501
96128
with open(self.source_json_path) as file:
97129
data = json.load(file)
98-
tables = data["tables"]
130+
tables = data.get("tables", [])
99131
if self.tags:
100-
table_id_list = [
101-
f"{table['target']['database']}.{table['target']['schema']}.{table['target']['name']}"
132+
table_list = [
133+
table
102134
for table in tables
103-
if self.tags.intersection(set(table["tags"]))
135+
if self.tags.intersection(set(table.get("tags", [])))
104136
]
105137
else:
106-
table_id_list = [
107-
f"{table['target']['database']}.{table['target']['schema']}.{table['target']['name']}"
108-
for table in tables
109-
]
110-
logging.debug(f"Table id list: {table_id_list}")
138+
table_list = [table for table in tables]
111139
logging.debug(f"Read file {self.source_json_path}, found {len(tables)}")
112-
return table_id_list
140+
return table_list

tests/test_column.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_dimension_dictionar(self, my_column: Column) -> None:
4545
"name": my_column_name,
4646
"type": my_field_type,
4747
"description": my_column_description,
48-
"sql": f"{{TABLE}}.{my_column_name}",
48+
"sql": f"${{TABLE}}.{my_column_name}",
4949
}
5050
assert my_column.column_dictionary == column_dictionary
5151

0 commit comments

Comments
 (0)