Skip to content

Commit 3c6faa5

Browse files
Merge pull request #6 from DataLabTechTV/dev
feat: econ_graph graph analytics
2 parents 39e99b0 + 454d0dd commit 3c6faa5

15 files changed

Lines changed: 10419 additions & 14 deletions

File tree

.env.example

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ ANALYTICS_MART_DB=marts/analytics.sqlite
3636
# KùzuDB configurations
3737
# =====================
3838

39-
MUSIC_TASTE_GRAPH_DB=graphs/music_taste.kz
40-
ECON_COMP_GRAPH_DB=graphs/econ_comp.kz
39+
MUSIC_TASTE_GRAPH_DB=graphs/music_taste.kuzu
40+
ECON_COMP_GRAPH_DB=graphs/econ_comp.kuzu
4141

4242
# Ollama configurations
4343
# =====================

dlctl/cli.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from shared.storage import Storage, StoragePrefix
1919

2020
LOG_FILE = Path(__file__).resolve().parents[1] / "logs/datalab.log"
21+
LOG_FILE_RELATIVE = os.path.relpath(LOG_FILE.resolve(), start=Path.cwd())
2122

2223

2324
@click.group(
@@ -34,7 +35,7 @@
3435
"logfile_enabled",
3536
is_flag=True,
3637
default=True,
37-
help=f"Disable file logging ({LOG_FILE.relative_to(Path.cwd())})",
38+
help=f"Disable file logging ({LOG_FILE_RELATIVE})",
3839
)
3940
@click.option(
4041
"--version",

graph/analytics.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
from pathlib import Path
2+
3+
import kuzu
4+
from loguru import logger as log
5+
6+
from shared.lakehouse import Lakehouse
7+
from shared.settings import LOCAL_DIR, env
8+
9+
10+
class GraphAnalytics:
11+
def __init__(self, schema: str):
12+
dbname = env.str(f"{schema.upper()}_GRAPH_DB")
13+
db_path = Path(LOCAL_DIR) / dbname
14+
15+
if not db_path.exists():
16+
raise FileNotFoundError(f"db not found: {db_path}")
17+
18+
db = kuzu.Database(db_path)
19+
self.conn = kuzu.Connection(db)
20+
21+
self.lh = Lakehouse()
22+
23+
def compute_con_scores(
24+
self,
25+
node_label: str,
26+
rel_label: str,
27+
column_name: str = "con_score",
28+
):
29+
log.info(
30+
"Computing CON scores for {} nodes via {} rels, storing to {} property",
31+
node_label,
32+
rel_label,
33+
column_name,
34+
)
35+
36+
log.debug("Adding {} to {}, if not exists", column_name, node_label)
37+
38+
self.conn.execute(
39+
f"""
40+
ALTER TABLE {node_label}
41+
ADD IF NOT EXISTS {column_name} DOUBLE
42+
"""
43+
)
44+
45+
log.debug("Resetting {} on {}", column_name, node_label)
46+
47+
self.conn.execute(
48+
f"""
49+
MATCH (c:{node_label})
50+
SET c.`{column_name}` = 0.0
51+
"""
52+
)
53+
54+
log.debug("Computing CON scores")
55+
56+
self.conn.execute(
57+
f"""
58+
MATCH (a:{node_label})-[ac:{rel_label}]->(c:{node_label})
59+
MATCH (b:{node_label})-[bc:{rel_label}]->(c:{node_label})
60+
WHERE a <> b
61+
WITH a, b,
62+
CASE
63+
WHEN ac.esi < bc.esi
64+
THEN ac.esi
65+
ELSE bc.esi
66+
END AS min_esi
67+
WITH a, b, sum(min_esi) AS con_pair
68+
WITH a, sum(con_pair) AS con_score
69+
SET a.`{column_name}` = con_score
70+
"""
71+
)

graph/cli.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import click
55
from loguru import logger as log
66

7+
from graph.analytics import GraphAnalytics
78
from graph.embedding import NodeEmbedding, NodeEmbeddingAlgo
89
from graph.ops import KuzuOps
910
from graph.rag import ContextAssemblerException, GraphRAG, GraphRetrievalException
@@ -104,6 +105,15 @@ def embeddings(schema: str, dimension: int, batch_size: int, epochs: int, algo:
104105
log.exception(e)
105106

106107

108+
@compute.command(help="Compute common out-neighbors (CON) score")
109+
@click.argument("schema", type=click.STRING)
110+
@click.argument("node_label", type=click.STRING)
111+
@click.argument("rel_label", type=click.STRING)
112+
def con_score(schema: str, node_label: str, rel_label: str):
113+
ga = GraphAnalytics(schema)
114+
ga.compute_con_scores(node_label, rel_label)
115+
116+
107117
@graph.command(help="Reindex embedding property")
108118
@click.argument("schema", type=click.STRING)
109119
def reindex(schema: str):

graph/ops.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import os
21
import shutil
32
import tempfile
43
from enum import Enum
4+
from pathlib import Path
55
from string import Template
66
from typing import Any, Optional
77

@@ -23,15 +23,15 @@ class KuzuTableType(Enum):
2323
class KuzuOps:
2424
def __init__(self, schema: str, overwrite: bool = False):
2525
dbname = env.str(f"{schema.upper()}_GRAPH_DB")
26-
db_path = os.path.join(LOCAL_DIR, dbname)
26+
db_path = Path(LOCAL_DIR) / dbname
2727

28-
if os.path.exists(db_path):
28+
if db_path.exists():
2929
if overwrite:
3030
log.warning(f"Overwriting database: {db_path}")
31-
if os.path.isdir(db_path):
31+
if db_path.is_dir():
3232
shutil.rmtree(db_path)
33-
elif os.path.isfile(db_path):
34-
os.unlink(db_path)
33+
elif db_path.is_file():
34+
db_path.unlink()
3535

3636
db = kuzu.Database(db_path)
3737
self.conn = kuzu.Connection(db)

0 commit comments

Comments
 (0)