Skip to content

Commit e06ad52

Browse files
tim-bandTim Band
andauthored
Updated tutorial, fixed a load of stack dumps (#80)
* Updated tutorial, fixed a load of stack dumps * A couple of fixes --------- Co-authored-by: Tim Band <t.b@ucl>
1 parent 91a6904 commit e06ad52

13 files changed

Lines changed: 702 additions & 857 deletions

datafaker/create.py

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from sqlalchemy.schema import CreateColumn, CreateSchema, CreateTable, MetaData, Table
1212

1313
from datafaker.base import FileUploader, TableGenerator
14-
from datafaker.settings import get_settings
14+
from datafaker.settings import get_destination_dsn, get_destination_schema
1515
from datafaker.utils import (
1616
create_db_engine,
1717
get_sync_engine,
@@ -60,15 +60,12 @@ def remove_on_delete_cascade(element: CreateTable, compiler: Any, **kw: Any) ->
6060

6161
def create_db_tables(metadata: MetaData) -> None:
6262
"""Create tables described by the sqlalchemy metadata object."""
63-
settings = get_settings()
64-
dst_dsn: str = settings.dst_dsn or ""
65-
assert dst_dsn != "", "Missing DST_DSN setting."
66-
63+
dst_dsn = get_destination_dsn()
6764
engine = get_sync_engine(create_db_engine(dst_dsn))
65+
schema_name = get_destination_schema()
6866

6967
# Create schema, if necessary.
70-
if settings.dst_schema:
71-
schema_name = settings.dst_schema
68+
if schema_name is not None:
7269
with engine.connect() as connection:
7370
# Do not try to create a schema if the schema already exists.
7471
# This is necessary if the user does not have schema creation privileges
@@ -97,12 +94,11 @@ def create_db_vocab(
9794
:param config: The configuration from --config-file
9895
:return: List of table names loaded.
9996
"""
100-
settings = get_settings()
101-
dst_dsn: str = settings.dst_dsn or ""
102-
assert dst_dsn != "", "Missing DST_DSN setting."
103-
10497
dst_engine = get_sync_engine(
105-
create_db_engine(dst_dsn, schema_name=settings.dst_schema)
98+
create_db_engine(
99+
get_destination_dsn(),
100+
schema_name=get_destination_schema(),
101+
)
106102
)
107103

108104
tables_loaded: list[str] = []
@@ -137,16 +133,12 @@ def create_db_data(
137133
metadata: MetaData,
138134
) -> RowCounts:
139135
"""Connect to a database and populate it with data."""
140-
settings = get_settings()
141-
dst_dsn: str = settings.dst_dsn or ""
142-
assert dst_dsn != "", "Missing DST_DSN setting."
143-
144136
return create_db_data_into(
145137
sorted_tables,
146138
df_module,
147139
num_passes,
148-
dst_dsn,
149-
settings.dst_schema,
140+
get_destination_dsn(),
141+
get_destination_schema(),
150142
metadata,
151143
)
152144

datafaker/main.py

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from jsonschema.exceptions import ValidationError
1414
from jsonschema.validators import validate
1515
from sqlalchemy import MetaData, Table
16+
from sqlalchemy.exc import InternalError, OperationalError
1617
from typer import Argument, Exit, Option, Typer
1718

1819
from datafaker.create import create_db_data, create_db_tables, create_db_vocab
@@ -34,7 +35,14 @@
3435
make_vocabulary_tables,
3536
)
3637
from datafaker.remove import remove_db_data, remove_db_tables, remove_db_vocab
37-
from datafaker.settings import Settings, get_settings
38+
from datafaker.settings import (
39+
Settings,
40+
SettingsError,
41+
get_destination_dsn,
42+
get_destination_schema,
43+
get_source_dsn,
44+
get_source_schema,
45+
)
3846
from datafaker.utils import (
3947
CONFIG_SCHEMA_PATH,
4048
conf_logger,
@@ -59,6 +67,19 @@
5967
app = Typer(no_args_is_help=True)
6068

6169

70+
def datafaker() -> None:
71+
"""Run the app and catch internal exceptions."""
72+
try:
73+
app()
74+
except OperationalError as exc:
75+
logger.error(str(exc))
76+
# Outside of app() typer.Exit(1) doesn't work
77+
sys.exit(1)
78+
except SettingsError as exc:
79+
logger.error(str(exc))
80+
sys.exit(1)
81+
82+
6283
def _check_file_non_existence(file_path: Path) -> None:
6384
"""Check that a given file does not exist. Exit with an error message if it does."""
6485
if file_path.exists():
@@ -294,9 +315,6 @@ def make_vocab(
294315
Example:
295316
$ datafaker make-vocab --config-file config.yml
296317
"""
297-
settings = get_settings()
298-
_require_src_db_dsn(settings)
299-
300318
generator_config = read_config_file(config_file) if config_file is not None else {}
301319
orm_metadata = load_metadata(orm_file, generator_config)
302320
make_vocabulary_tables(
@@ -331,11 +349,12 @@ def make_stats(
331349

332350
config = read_config_file(config_file) if config_file is not None else {}
333351

334-
settings = get_settings()
335-
src_dsn: str = _require_src_db_dsn(settings)
336-
337352
src_stats = asyncio.get_event_loop().run_until_complete(
338-
make_src_stats(src_dsn, config, settings.src_schema)
353+
make_src_stats(
354+
get_source_dsn(),
355+
config,
356+
get_source_schema(),
357+
)
339358
)
340359
stats_file_path.write_text(yaml.dump(src_stats), encoding="utf-8")
341360
logger.debug("%s created.", stats_file)
@@ -369,10 +388,11 @@ def make_tables(
369388
if not force:
370389
_check_file_non_existence(orm_file_path)
371390

372-
settings = get_settings()
373-
src_dsn: str = _require_src_db_dsn(settings)
374-
375-
content = make_tables_file(src_dsn, settings.src_schema, parquet_dir)
391+
content = make_tables_file(
392+
get_source_dsn(),
393+
get_source_schema(),
394+
parquet_dir,
395+
)
376396
orm_file_path.write_text(content, encoding="utf-8")
377397
logger.debug("%s created.", orm_file)
378398

@@ -386,8 +406,6 @@ def configure_tables(
386406
) -> None:
387407
"""Interactively set tables to ignored, vocabulary or primary private."""
388408
logger.debug("Configuring tables in %s.", config_file)
389-
settings = get_settings()
390-
src_dsn: str = _require_src_db_dsn(settings)
391409
config_file_path = Path(config_file)
392410
config = {}
393411
if config_file_path.exists():
@@ -397,7 +415,10 @@ def configure_tables(
397415
# we don't pass config here so that no tables are ignored
398416
metadata = load_metadata(orm_file)
399417
config_updated = update_config_tables(
400-
src_dsn, settings.src_schema, metadata, config
418+
get_source_dsn(),
419+
get_source_schema(),
420+
metadata,
421+
config,
401422
)
402423
if config_updated is None:
403424
logger.debug("Cancelled")
@@ -416,8 +437,6 @@ def configure_missing(
416437
) -> None:
417438
"""Interactively set the missingness of the generated data."""
418439
logger.debug("Configuring missingness in %s.", config_file)
419-
settings = get_settings()
420-
src_dsn: str = _require_src_db_dsn(settings)
421440
config_file_path = Path(config_file)
422441
config: dict[str, Any] = {}
423442
if config_file_path.exists():
@@ -427,7 +446,12 @@ def configure_missing(
427446
if isinstance(config_any, dict):
428447
config = config_any
429448
metadata = load_metadata(orm_file, config)
430-
config_updated = update_missingness(src_dsn, settings.src_schema, metadata, config)
449+
config_updated = update_missingness(
450+
get_source_dsn(),
451+
get_source_schema(),
452+
metadata,
453+
config,
454+
)
431455
if config_updated is None:
432456
logger.debug("Cancelled")
433457
return
@@ -452,8 +476,6 @@ def configure_generators(
452476
) -> None:
453477
"""Interactively set generators for column data."""
454478
logger.debug("Configuring generators in %s.", config_file)
455-
settings = get_settings()
456-
src_dsn: str = _require_src_db_dsn(settings)
457479
config_file_path = Path(config_file)
458480
config = {}
459481
if config_file_path.exists():
@@ -462,7 +484,11 @@ def configure_generators(
462484
)
463485
metadata = load_metadata(orm_file)
464486
config_updated = update_config_generators(
465-
src_dsn, settings.src_schema, metadata, config, spec_path=spec
487+
get_source_dsn(),
488+
get_source_schema(),
489+
metadata,
490+
config,
491+
spec_path=spec,
466492
)
467493
if config_updated is None:
468494
logger.debug("Cancelled")
@@ -576,10 +602,8 @@ def dump_data(
576602
" specified, or specify an existing directory"
577603
)
578604
sys.exit(1)
579-
settings = get_settings()
580-
dst_dsn: str = settings.dst_dsn or ""
581-
assert dst_dsn != "", "Missing DST_DSN setting."
582-
schema_name = settings.dst_schema
605+
dst_dsn = get_destination_dsn()
606+
schema_name = get_destination_schema()
583607
config = read_config_file(config_file) if config_file is not None else {}
584608
metadata = load_metadata_for_output(orm_file, config)
585609
mtables = convert_table_names_to_tables(table, metadata)
@@ -677,7 +701,12 @@ def remove_tables(
677701
else:
678702
config = read_config_file(config_file)
679703
metadata = load_metadata_for_output(orm_file, config)
680-
remove_db_tables(metadata)
704+
try:
705+
remove_db_tables(metadata)
706+
except InternalError as exc:
707+
logger.error("Failed to drop tables: %s", exc)
708+
logger.error("Please try again using the --all option.")
709+
sys.exit(1)
681710
logger.debug("Tables dropped.")
682711
else:
683712
logger.info("Would remove tables if called with --yes.")
@@ -727,4 +756,4 @@ def version() -> None:
727756

728757

729758
if __name__ == "__main__":
730-
app()
759+
datafaker()

datafaker/make.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
from datafaker import providers
2626
from datafaker.parquet2orm import get_parquet_orm
27-
from datafaker.settings import get_settings
27+
from datafaker.settings import get_source_dsn, get_source_schema
2828
from datafaker.utils import (
2929
MaybeAsyncEngine,
3030
create_db_engine,
@@ -453,11 +453,12 @@ def _get_provider_for_column(column: Column) -> Tuple[list[str], str, dict[str,
453453
if not generator_function:
454454
generator_function = "generic.null_provider.null"
455455
logger.warning(
456-
"Unsupported SQLAlchemy type %s for column %s. "
456+
"Unsupported SQLAlchemy type %s for column %s of table %s. "
457457
"Setting this column to NULL always, "
458458
"you may want to configure a row generator for it instead.",
459459
column.type,
460460
column.name,
461+
column.table.name,
461462
)
462463

463464
return variable_names, generator_function, generator_arguments
@@ -551,11 +552,12 @@ def make_vocabulary_tables(
551552
table_names: set[str] | None = None,
552553
) -> None:
553554
"""Extract the data from the source database for each vocabulary table."""
554-
settings = get_settings()
555-
src_dsn: str = settings.src_dsn or ""
556-
assert src_dsn != "", "Missing SRC_DSN setting."
557-
558-
engine = get_sync_engine(create_db_engine(src_dsn, schema_name=settings.src_schema))
555+
engine = get_sync_engine(
556+
create_db_engine(
557+
get_source_dsn(),
558+
schema_name=get_source_schema(),
559+
)
560+
)
559561
vocab_names = get_vocabulary_table_names(config)
560562
if table_names is None:
561563
table_names = vocab_names

datafaker/remove.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from sqlalchemy import MetaData, delete
55

6-
from datafaker.settings import get_settings
6+
from datafaker.settings import get_destination_dsn, get_destination_schema
77
from datafaker.utils import (
88
create_db_engine,
99
get_sync_engine,
@@ -17,10 +17,11 @@
1717

1818
def remove_db_data(metadata: MetaData, config: Mapping[str, Any]) -> None:
1919
"""Truncate the synthetic data tables but not the vocabularies."""
20-
settings = get_settings()
21-
assert settings.dst_dsn, "Missing destination database settings"
2220
remove_db_data_from(
23-
metadata, config, settings.dst_dsn, schema_name=settings.dst_schema
21+
metadata,
22+
config,
23+
get_destination_dsn(),
24+
schema_name=get_destination_schema(),
2425
)
2526

2627

@@ -41,10 +42,11 @@ def remove_db_vocab(
4142
metadata: MetaData, meta_dict: Mapping[str, Any], config: Mapping[str, Any]
4243
) -> None:
4344
"""Truncate the vocabulary tables."""
44-
settings = get_settings()
45-
assert settings.dst_dsn, "Missing destination database settings"
4645
dst_engine = get_sync_engine(
47-
create_db_engine(settings.dst_dsn, schema_name=settings.dst_schema)
46+
create_db_engine(
47+
get_destination_dsn(),
48+
schema_name=get_destination_schema(),
49+
)
4850
)
4951

5052
with dst_engine.connect() as dst_conn:
@@ -58,10 +60,11 @@ def remove_db_vocab(
5860

5961
def remove_db_tables(metadata: Optional[MetaData]) -> None:
6062
"""Drop the tables in the destination schema."""
61-
settings = get_settings()
62-
assert settings.dst_dsn, "Missing destination database settings"
6363
dst_engine = get_sync_engine(
64-
create_db_engine(settings.dst_dsn, schema_name=settings.dst_schema)
64+
create_db_engine(
65+
get_destination_dsn(),
66+
schema_name=get_destination_schema(),
67+
)
6568
)
6669
if metadata is None:
6770
metadata = MetaData()

0 commit comments

Comments
 (0)