Skip to content

Commit ed1415f

Browse files
tim-bandTim Band
andauthored
configure-generators gains "set generator.name" (#51)
and --spec=table-column-gen.csv Co-authored-by: Tim Band <t.b@ucl>
1 parent 840b72b commit ed1415f

3 files changed

Lines changed: 93 additions & 35 deletions

File tree

datafaker/interactive.py

Lines changed: 72 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from abc import ABC, abstractmethod
22
import cmd
33
from collections.abc import Mapping
4+
import csv
45
from dataclasses import dataclass
56
from enum import Enum
7+
from pathlib import Path
68
from prettytable import PrettyTable
79
import re
810
import sqlalchemy
@@ -1022,7 +1024,7 @@ def _copy_entries(self) -> None:
10221024
self.set_table_config(entry.name, table_config)
10231025
self.config["src-stats"] = src_stats
10241026

1025-
def do_quit(self, _arg):
1027+
def do_quit(self, arg):
10261028
"Check the updates, save them if desired and quit the configurer."
10271029
count = 0
10281030
for entry in self.table_entries:
@@ -1042,7 +1044,10 @@ def do_quit(self, _arg):
10421044
)
10431045
if count == 0:
10441046
self.print("You have made no changes.")
1045-
reply = self.ask_save()
1047+
if arg in {"yes", "no"}:
1048+
reply = arg
1049+
else:
1050+
reply = self.ask_save()
10461051
if reply == "yes":
10471052
self._copy_entries()
10481053
return True
@@ -1112,30 +1117,34 @@ def _get_generator_index(self, table_index, column_name):
11121117
return n
11131118
return None
11141119

1120+
def go_to(self, target):
1121+
parts = target.split(".", 1)
1122+
table_index = self._get_table_index(parts[0])
1123+
if table_index is None:
1124+
self.print("No such (non-vocabulary, non-ignored) table name {0}", parts[0])
1125+
return False
1126+
gen_index = None
1127+
if 1 < len(parts) and parts[1]:
1128+
gen_index = self._get_generator_index(table_index, parts[1])
1129+
if gen_index is None:
1130+
self.print("we cannot set the generator for column {0}", parts[1])
1131+
return False
1132+
self.set_table_index(table_index)
1133+
if gen_index is not None:
1134+
self.generator_index = gen_index
1135+
self.set_prompt()
1136+
return True
1137+
11151138
def do_next(self, arg):
11161139
"""
11171140
Go to the next generator.
11181141
Or, go to a named table: 'next tablename'.
11191142
Or go to a column: 'next tablename.columnname'.
11201143
"""
11211144
if arg:
1122-
parts = arg.split(".", 1)
1123-
table_index = self._get_table_index(parts[0])
1124-
if table_index is None:
1125-
self.print("No such (non-vocabulary, non-ignored) table name {0}", parts[0])
1126-
return
1127-
gen_index = None
1128-
if 1 < len(parts) and parts[1]:
1129-
gen_index = self._get_generator_index(table_index, parts[1])
1130-
if gen_index is None:
1131-
self.print("we cannot set the generator for column {0}", parts[1])
1132-
return
1133-
self.set_table_index(table_index)
1134-
if gen_index is not None:
1135-
self.generator_index = gen_index
1136-
self.set_prompt()
1137-
return
1138-
self._go_next()
1145+
self.go_to(arg)
1146+
else:
1147+
self._go_next()
11391148

11401149
def do_n(self, arg):
11411150
""" Synonym for next """
@@ -1374,33 +1383,43 @@ def do_p(self, arg):
13741383
""" Synonym for propose """
13751384
self.do_propose(arg)
13761385

1386+
def _get_proposed_generator_by_name(self, gen_name: str) -> Generator | None:
1387+
for gen in self._get_generator_proposals():
1388+
if gen.name() == gen_name:
1389+
return gen
1390+
return None
1391+
13771392
def do_set(self, arg: str):
13781393
"""
13791394
Set one of the proposals as a generator.
13801395
Takes a single integer argument.
13811396
"""
1382-
if not arg.isdigit():
1383-
self.print("set requires a single integer argument; 'set 3' sets the third generator that 'propose' lists.")
1384-
return
1385-
if not self._generators_valid():
1386-
self.print("Please run 'propose' before 'set'")
1397+
if arg.isdigit() and not self._generators_valid():
1398+
self.print("Please run 'propose' before 'set <number>'")
13871399
return
13881400
gens = self._get_generator_proposals()
1389-
index = int(arg)
1390-
if index < 1:
1391-
self.print("set's argument must be at least 1")
1392-
return
1393-
if len(gens) < index:
1394-
self.print("There are currently only {0} generators proposed, please select one of them.", index)
1395-
return
1401+
if arg.isdigit():
1402+
index = int(arg)
1403+
if index < 1:
1404+
self.print("set's integer argument must be at least 1")
1405+
return
1406+
if len(gens) < index:
1407+
self.print("There are currently only {0} generators proposed, please select one of them.", index)
1408+
return
1409+
new_gen = gens[index - 1]
1410+
else:
1411+
new_gen = self._get_proposed_generator_by_name(arg)
1412+
if new_gen is None:
1413+
self.print("'{0}' is not an appropriate generator for this column", arg)
1414+
return
13961415
(table, gen_info) = self.get_table_and_generator()
13971416
if table is None:
13981417
self.print("Error: no table")
13991418
return
14001419
if gen_info is None:
14011420
self.print("Error: no column")
14021421
return
1403-
gen_info.new_gen = gens[index - 1]
1422+
gen_info.new_gen = new_gen
14041423
self._go_next()
14051424

14061425
def do_s(self, arg):
@@ -1421,7 +1440,26 @@ def do_unset(self, _arg):
14211440
gen_info.new_gen = None
14221441
self._go_next()
14231442

1424-
def update_config_generators(src_dsn: str, src_schema: str, metadata: MetaData, config: Mapping):
1443+
1444+
def update_config_generators(
1445+
src_dsn: str,
1446+
src_schema: str,
1447+
metadata: MetaData,
1448+
config: Mapping,
1449+
spec_path: Path | None,
1450+
):
14251451
with GeneratorCmd(src_dsn, src_schema, metadata, config) as gc:
1426-
gc.cmdloop()
1452+
if spec_path is None:
1453+
gc.cmdloop()
1454+
return gc.config
1455+
spec = spec_path.open()
1456+
line_no = 0
1457+
for line in csv.reader(spec):
1458+
line_no += 1
1459+
if line:
1460+
if len(line) != 3:
1461+
logger.error("line {0} of file {1} does not have three values", line_no, spec_path)
1462+
if gc.go_to(f"{line[0]}.{line[1]}"):
1463+
gc.do_set(line[2])
1464+
gc.do_quit("yes")
14271465
return gc.config

datafaker/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ def configure_missing(
380380
def configure_generators(
381381
config_file: Optional[str] = Option(CONFIG_FILENAME, help="Path of the configuration file to alter"),
382382
orm_file: str = Option(ORM_FILENAME, help="The name of the ORM yaml file"),
383+
spec: Path = Option(None, help="CSV file (headerless) with fields table-name, column-name, generator-name to set non-interactively")
383384
):
384385
"""
385386
Interactively set generators for column data.
@@ -392,7 +393,7 @@ def configure_generators(
392393
if config_file_path.exists():
393394
config = yaml.load(config_file_path.read_text(encoding="UTF-8"), Loader=yaml.SafeLoader)
394395
metadata = load_metadata(orm_file, config)
395-
config_updated = update_config_generators(src_dsn, settings.src_schema, metadata, config)
396+
config_updated = update_config_generators(src_dsn, settings.src_schema, metadata, config, spec_path=spec)
396397
if config_updated is None:
397398
logger.debug("Cancelled")
398399
return

tests/test_interactive.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,25 @@ def test_set_generator_distribution(self):
438438
f"SELECT AVG({COLUMN}) AS mean__{COLUMN}, STDDEV({COLUMN}) AS stddev__{COLUMN} FROM {TABLE}",
439439
)
440440

441+
def test_set_generator_distribution_directly(self):
442+
""" Test that we can set one generator to gaussian without going through propose. """
443+
with self._get_cmd({}) as gc:
444+
TABLE = "string"
445+
COLUMN = "frequency"
446+
GENERATOR = "dist_gen.normal"
447+
gc.do_next(f"{TABLE}.{COLUMN}")
448+
gc.reset()
449+
gc.do_set(GENERATOR)
450+
self.assertListEqual(gc.messages, [])
451+
gc.do_quit("")
452+
self.assertEqual(len(gc.config["src-stats"]), 1)
453+
self.assertSetEqual(set(gc.config["src-stats"][0].keys()), {"comments", "name", "query"})
454+
self.assertEqual(gc.config["src-stats"][0]["name"], f"auto__{TABLE}")
455+
self.assertEqual(
456+
gc.config["src-stats"][0]["query"],
457+
f"SELECT AVG({COLUMN}) AS mean__{COLUMN}, STDDEV({COLUMN}) AS stddev__{COLUMN} FROM {TABLE}",
458+
)
459+
441460
def test_set_generator_choice(self):
442461
""" Test that we can set one generator to uniform choice. """
443462
with self._get_cmd({}) as gc:

0 commit comments

Comments
 (0)