Skip to content

Commit d8e84d7

Browse files
authored
Parquet dir stored so it can be used by all source commands. (#83)
#82 make-stats gains a --parquet-dir option refactoring so everything can work with parquet
1 parent 18bb0bf commit d8e84d7

25 files changed

Lines changed: 777 additions & 363 deletions

datafaker/interactive/__init__.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from sqlalchemy import MetaData
99

10+
from datafaker.interactive.base import DbCmd
1011
from datafaker.interactive.generators import GeneratorCmd, try_setting_generator
1112
from datafaker.interactive.missingness import MissingnessCmd
1213
from datafaker.interactive.table import TableCmd
@@ -25,10 +26,15 @@
2526

2627

2728
def update_config_tables(
28-
src_dsn: str, src_schema: str | None, metadata: MetaData, config: MutableMapping
29+
src_dsn: str,
30+
src_schema: str | None,
31+
metadata: MetaData,
32+
config: MutableMapping,
33+
parquet_dir: Path | None,
2934
) -> Mapping[str, Any]:
3035
"""Ask the user to specify what should happen to each table."""
31-
with TableCmd(src_dsn, src_schema, metadata, config) as tc:
36+
settings = DbCmd.Settings(src_dsn, src_schema, config, metadata, parquet_dir)
37+
with TableCmd(settings) as tc:
3238
tc.cmdloop()
3339
return tc.config
3440

@@ -38,6 +44,7 @@ def update_missingness(
3844
src_schema: str | None,
3945
metadata: MetaData,
4046
config: MutableMapping[str, Any],
47+
parquet_dir: Path | None,
4148
) -> Mapping[str, Any]:
4249
"""
4350
Ask the user to update the missingness information in ``config.yaml``.
@@ -49,16 +56,14 @@ def update_missingness(
4956
:param config: The starting configuration,
5057
:return: The updated configuration.
5158
"""
52-
with MissingnessCmd(src_dsn, src_schema, metadata, config) as mc:
59+
settings = DbCmd.Settings(src_dsn, src_schema, config, metadata, parquet_dir)
60+
with MissingnessCmd(settings) as mc:
5361
mc.cmdloop()
5462
return mc.config
5563

5664

5765
def update_config_generators(
58-
src_dsn: str,
59-
src_schema: str | None,
60-
metadata: MetaData,
61-
config: MutableMapping[str, Any],
66+
settings: DbCmd.Settings,
6267
spec_path: Path | None,
6368
) -> Mapping[str, Any]:
6469
"""
@@ -68,14 +73,11 @@ def update_config_generators(
6873
Column name (or space-separated list of column names), Generator
6974
name required, Second choice generator name, Third choice generator
7075
name, etcetera.
71-
:param src_dsn: Address of the source database
72-
:param src_schema: Name of the source database schema to read from
73-
:param metadata: SQLAlchemy representation of the source database
74-
:param config: Existing configuration (will be destructively updated)
76+
:param settings: Source database settings.
7577
:param spec_path: The path of the CSV file containing the specification
7678
:return: Updated configuration.
7779
"""
78-
with GeneratorCmd(src_dsn, src_schema, metadata, config) as gc:
80+
with GeneratorCmd(settings) as gc:
7981
if spec_path is None:
8082
gc.cmdloop()
8183
return gc.config

datafaker/interactive/base.py

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from collections.abc import Mapping, MutableMapping, Sequence
55
from dataclasses import dataclass
66
from enum import Enum
7+
from pathlib import Path
78
from types import TracebackType
89
from typing import Any, Optional, Type
910

@@ -121,30 +122,50 @@ def make_table_entry(
121122
:return: The table entry or None if this table should not be interacted with.
122123
"""
123124

125+
@dataclass
126+
class Settings:
127+
"""Settings for the source database."""
128+
129+
dsn: str
130+
schema: str | None
131+
config: MutableMapping[str, Any]
132+
metadata: MetaData
133+
parquet_dir: Path | None
134+
124135
def __init__(
125136
self,
126-
src_dsn: str,
127-
src_schema: str | None,
128-
metadata: MetaData,
129-
config: MutableMapping[str, Any],
137+
settings: Settings,
130138
):
131-
"""Initialise a DbCmd."""
139+
"""
140+
Initialise a DbCmd.
141+
142+
:param src_dsn: The database connection string for the source database.
143+
:param src_schema: The name of the schema name for the source database.
144+
:param metadata: The metadata for the source database.
145+
:param config: The ``config.xml`` object.
146+
:param parquet_dir: The directory where parquet files are stored that
147+
are to be considered part of the source database (only for DuckDB).
148+
"""
132149
super().__init__()
133-
self.config: MutableMapping[str, Any] = config
134-
self.metadata = metadata
150+
self.config: MutableMapping[str, Any] = settings.config
151+
self.metadata = settings.metadata
135152
self._table_entries: list[TableEntry] = []
136-
tables_config: MutableMapping = config.get("tables", {})
153+
tables_config: MutableMapping = self.config.get("tables", {})
137154
if not isinstance(tables_config, MutableMapping):
138155
tables_config = {}
139-
for name in metadata.tables.keys():
156+
for name in self.metadata.tables.keys():
140157
table_config = tables_config.get(name, {})
141158
if not isinstance(table_config, MutableMapping):
142159
table_config = {}
143160
entry = self.make_table_entry(name, table_config)
144161
if entry is not None:
145162
self._table_entries.append(entry)
146163
self.table_index = 0
147-
self.engine = create_db_engine(src_dsn, schema_name=src_schema)
164+
self.engine = create_db_engine(
165+
settings.dsn,
166+
schema_name=settings.schema,
167+
parquet_dir=settings.parquet_dir,
168+
)
148169

149170
@property
150171
def sync_engine(self) -> Engine:

datafaker/interactive/generators.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import Any, Callable, Optional, cast
77

88
import sqlalchemy
9-
from sqlalchemy import Column, MetaData
9+
from sqlalchemy import Column
1010

1111
from datafaker.generators import everything_factory
1212
from datafaker.generators.base import Generator, PredefinedGenerator
@@ -147,10 +147,7 @@ def make_table_entry(
147147

148148
def __init__(
149149
self,
150-
src_dsn: str,
151-
src_schema: str | None,
152-
metadata: MetaData,
153-
config: MutableMapping[str, Any],
150+
settings: DbCmd.Settings,
154151
) -> None:
155152
"""
156153
Initialise a ``GeneratorCmd``.
@@ -160,7 +157,7 @@ def __init__(
160157
:param metadata: SQLAlchemy metadata for the source database
161158
:param config: Configuration loaded from ``config.yaml``
162159
"""
163-
super().__init__(src_dsn, src_schema, metadata, config)
160+
super().__init__(settings)
164161
self.generators: list[Generator] | None = None
165162
self.generator_index = 0
166163
self.generators_valid_columns: Optional[tuple[int, list[str]]] = None

datafaker/interactive/missingness.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
"""Missingness configuration shell."""
22
import re
3-
from collections.abc import Iterable, Mapping, MutableMapping
3+
from collections.abc import Iterable, Mapping
44
from dataclasses import dataclass
55
from typing import cast
66

7-
from sqlalchemy import MetaData
8-
97
from datafaker.interactive.base import DbCmd, TableEntry
108

119

@@ -139,20 +137,14 @@ def make_table_entry(
139137

140138
def __init__(
141139
self,
142-
src_dsn: str,
143-
src_schema: str | None,
144-
metadata: MetaData,
145-
config: MutableMapping,
140+
settings: DbCmd.Settings,
146141
):
147142
"""
148143
Initialise a MissingnessCmd.
149144
150-
:param src_dsn: connection string for the source database.
151-
:param src_schema: schema name for the source database.
152-
:param metadata: SQLAlchemy metadata for the source database.
153-
:param config: Configuration from the ``config.yaml`` file.
145+
:param settings: source database settings.
154146
"""
155-
super().__init__(src_dsn, src_schema, metadata, config)
147+
super().__init__(settings)
156148
self.set_prompt()
157149

158150
@property

datafaker/interactive/table.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
"""Table configuration command shell."""
2-
from collections.abc import Mapping, MutableMapping
2+
from collections.abc import Mapping
33
from dataclasses import dataclass
44
from typing import Any, cast
55

66
import sqlalchemy
7-
from sqlalchemy import MetaData
87

98
from datafaker.interactive.base import (
109
TYPE_LETTER,
@@ -92,15 +91,12 @@ def make_table_entry(
9291

9392
def __init__(
9493
self,
95-
src_dsn: str,
96-
src_schema: str | None,
97-
metadata: MetaData,
98-
config: MutableMapping[str, Any],
94+
settings: DbCmd.Settings,
9995
*args: Any,
10096
**kwargs: Any,
10197
) -> None:
10298
"""Initialise a TableCmd."""
103-
super().__init__(src_dsn, src_schema, metadata, config, *args, **kwargs)
99+
super().__init__(settings, *args, **kwargs)
104100
self.set_prompt()
105101

106102
@property

0 commit comments

Comments
 (0)