Skip to content

Commit 89b3e8a

Browse files
Merge pull request #4 from DataLabTechTV/dev
feat: ingestion support for The Atlas of Economic Complexity
2 parents 7124ff4 + aa65fcd commit 89b3e8a

35 files changed

Lines changed: 1428 additions & 106 deletions

.env.example

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ S3_BUCKET=lakehouse
1818
S3_INGEST_PREFIX=raw
1919
S3_STAGE_PREFIX=stage
2020
S3_GRAPHS_MART_PREFIX=marts/graphs
21+
S3_ANALYTICS_MART_PREFIX=marts/analytics
2122
S3_EXPORTS_PREFIX=exports
2223
S3_BACKUPS_PREFIX=backups
2324

@@ -30,11 +31,13 @@ S3_BACKUPS_PREFIX=backups
3031
ENGINE_DB=engine.duckdb
3132
STAGE_DB=stage.sqlite
3233
GRAPHS_MART_DB=marts/graphs.sqlite
34+
ANALYTICS_MART_DB=marts/analytics.sqlite
3335

3436
# KùzuDB configurations
3537
# =====================
3638

37-
MUSIC_TASTE_GRAPH_DB=graphs/music_taste
39+
MUSIC_TASTE_GRAPH_DB=graphs/music_taste.kz
40+
ECON_COMP_GRAPH_DB=graphs/econ_comp.kz
3841

3942
# Ollama configurations
4043
# =====================

dlctl/cli.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from export.cli import export
1414
from graph.cli import graph
1515
from ingest.cli import ingest
16+
from shared.cache import cache_usage, expunge_cache
1617
from shared.settings import LOCAL_DIR, MART_DB_VARS, env
1718
from shared.storage import Storage, StoragePrefix
1819

@@ -185,7 +186,7 @@ def backup_ls(include_all: bool):
185186
help="Model name to transform (can be used multiple times)",
186187
)
187188
@click.option("--debug", is_flag=True, help="Run dbt with the debug flag")
188-
def transform(models: tuple[str], debug: bool):
189+
def transform(models: Optional[tuple[str, ...]], debug: bool):
189190
dbt_handler = DBTHandler(debug=debug)
190191
dbt_handler.run(models)
191192

@@ -195,9 +196,18 @@ def transform(models: tuple[str], debug: bool):
195196

196197

197198
@dlctl.command(name="test", help="Run data tests")
198-
def test():
199-
dbt_handler = DBTHandler()
200-
dbt_handler.test()
199+
@click.option(
200+
"--model",
201+
"-m",
202+
"models",
203+
multiple=True,
204+
type=click.STRING,
205+
help="Model name to transform (can be used multiple times)",
206+
)
207+
@click.option("--debug", is_flag=True, help="Run dbt with the debug flag")
208+
def test(models: Optional[tuple[str, ...]], debug: bool):
209+
dbt_handler = DBTHandler(debug=debug)
210+
dbt_handler.test(models)
201211

202212

203213
# Documentation
@@ -243,5 +253,39 @@ def generate_init_sql(path: str):
243253
T.generate_init_sql(path)
244254

245255

256+
# Cache
257+
# =====
258+
259+
260+
@dlctl.group(help="Manage cache (requests, etc.)")
261+
def cache():
262+
pass
263+
264+
265+
@cache.command(name="clean", help="Expunge cache")
266+
@click.option(
267+
"-ns",
268+
"--namespace",
269+
type=click.Choice(["requests", "huggingface"]),
270+
help="Limit cache cleaning to a namespace",
271+
)
272+
@click.option(
273+
"-n",
274+
"--name",
275+
type=click.STRING,
276+
help="Limit cache cleaning to a specific name (namespace required as well)",
277+
)
278+
def cache_clean(namespace: Optional[str], name: Optional[str]):
279+
if namespace is None and name is not None:
280+
raise click.UsageError("name requires that namespace is set")
281+
282+
expunge_cache(namespace, name)
283+
284+
285+
@cache.command(name="df", help="Calculate cache usage statistics")
286+
def cache_df():
287+
cache_usage()
288+
289+
246290
if __name__ == "__main__":
247291
dlctl()

dlctl/dbt_handler.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def mkdirs(self):
4646
def deps(self):
4747
self.dbt.invoke(["deps"] + self.PROJECT_ARGS)
4848

49-
def run(self, models: Optional[tuple[str]] = None):
49+
def run(self, models: Optional[tuple[str, ...]] = None):
5050
args = ["run"]
5151
args += self.PROJECT_ARGS
5252

@@ -71,8 +71,20 @@ def run(self, models: Optional[tuple[str]] = None):
7171
else:
7272
log.warning("{}: {}", r.node.name, r.status)
7373

74-
def test(self):
75-
self.dbt.invoke(["test"] + self.PROJECT_ARGS)
74+
def test(self, models: Optional[tuple[str, ...]] = None):
75+
args = ["test"]
76+
args += self.PROJECT_ARGS
77+
78+
if self.debug:
79+
args += ["--debug"]
80+
81+
if models is not None and len(models) > 0:
82+
args += [
83+
"--select",
84+
",".join(f"{model}" for model in models),
85+
]
86+
87+
self.dbt.invoke(args)
7688

7789
def docs_generate(self):
7890
self.dbt.invoke(["docs", "generate"] + self.PROJECT_ARGS)

graph/cli.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,14 @@ def load(schema: str, overwrite: bool):
4343

4444
try:
4545
ops = KuzuOps(schema, overwrite=overwrite)
46-
ops.load_music_graph(s3_path)
46+
47+
match schema:
48+
case "music_taste":
49+
ops.load_music_taste(s3_path)
50+
case "econ_comp":
51+
ops.load_econ_comp(s3_path)
52+
case _:
53+
raise click.UsageError(f"{schema}: graph unsupported")
4754
except Exception as e:
4855
log.error(e)
4956

0 commit comments

Comments
 (0)