Skip to content

Commit 3c1c9aa

Browse files
author
Tim Band
committed
Initial attempt at a static version of df.py
1 parent 2306b12 commit 3c1c9aa

1 file changed

Lines changed: 189 additions & 0 deletions

File tree

datafaker/populate.py

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
"""This file was auto-generated by datafaker but can be edited manually."""
2+
from collections.abc import Iterable, Mapping, MutableMapping, Sequence
3+
from mimesis import Generic, Numeric, Person
4+
from mimesis.locales import Locale
5+
import sqlalchemy
6+
import sys
7+
from typing import Any, Callable
8+
import yaml
9+
10+
from datafaker.base import FileUploader, TableGenerator, DistributionGenerator, ColumnPresence
11+
from datafaker.main import load_metadata
12+
from datafaker.make import TableGeneratorInfo, StoryGeneratorInfo #TODO: move these in here!
13+
14+
from datafaker.providers import (
15+
BytesProvider,
16+
ColumnValueProvider,
17+
NullProvider,
18+
SQLGroupByProvider,
19+
TimedeltaProvider,
20+
TimespanProvider,
21+
WeightedBooleanProvider,
22+
)
23+
from datafaker.utils import logging, get_vocabulary_table_names
24+
25+
generic = Generic(locale=Locale.EN_GB)
26+
numeric = Numeric()
27+
person = Person()
28+
dist_gen = DistributionGenerator()
29+
column_presence = ColumnPresence()
30+
31+
generic.add_provider(BytesProvider)
32+
generic.add_provider(ColumnValueProvider)
33+
generic.add_provider(NullProvider)
34+
generic.add_provider(SQLGroupByProvider)
35+
generic.add_provider(TimedeltaProvider)
36+
generic.add_provider(TimespanProvider)
37+
generic.add_provider(WeightedBooleanProvider)
38+
39+
#metadata = load_metadata("{{ orm_file_name }}", "{{ config_file_name }}")
40+
41+
#import {{ row_generator_module_name }}
42+
#import {{ story_generator_module_name }}
43+
44+
def _eval_structure(config: Any, context: Mapping) -> Any:
45+
"""
46+
Turn a structure from ``config.yaml`` into a Python object.
47+
48+
:param config: a structure (list, dict, number or expression in a string).
49+
:return: Object matching the structure of ``config`` with strings eval'ed.
50+
"""
51+
if isinstance(config, str):
52+
return eval(config, locals=context)
53+
if isinstance(config, Mapping):
54+
return {
55+
k: _eval_structure(v, context)
56+
for k, v in config.items()
57+
}
58+
if isinstance(config, Sequence):
59+
return [_eval_structure(v, context) for v in config]
60+
return config
61+
62+
63+
def _get_object(class_name: Any, context: Mapping) -> Any:
64+
"""
65+
Get an object out of the context.
66+
67+
:param class_name: The name of the class, qualified if necessary.
68+
Like "module.MyClass.Nested"
69+
:param context: Mapping of strings to objects with those names.
70+
:return: A value from ``context`` if there are no qualifying names,
71+
otherwise the attribute of the base object.
72+
"""
73+
if not isinstance(class_name, str):
74+
return None
75+
if not isinstance(kwargs, Mapping):
76+
kwargs = {}
77+
parts = class_name.split(".")
78+
if parts[0] not in context:
79+
logging.error('No such object "%"', parts[0])
80+
return None
81+
value = context[parts[0]]
82+
so_far = parts[0]
83+
for part in parts[1:]:
84+
so_far += "." + part
85+
if not hasattr(value, part):
86+
logging.error('No such attribute "%"', so_far)
87+
return None
88+
value = getattr(value, part)
89+
return value
90+
91+
92+
def _call_from_context(callable_name: Any, kwargs: Any, context: Mapping) -> Any:
93+
"""
94+
Call a callable from the classes (or functions) in the context.
95+
96+
:param class_name: Possibly qualified name of class to construct.
97+
:param context: Mapping of base classes and modules
98+
:return: Constructed object, or None if this did not work.
99+
"""
100+
cls = _get_object(callable_name, context)
101+
if not isinstance(cls, Callable):
102+
return None
103+
kws = _eval_structure(kwargs, context)
104+
if kws is None:
105+
return None
106+
return cls(**kws)
107+
108+
109+
def _get_src_stats(src_stats_filename: str) -> Any:
110+
"""
111+
Get the SRC_STATS object
112+
"""
113+
with open("{{ src_stats_filename }}", "r", encoding="utf-8") as f:
114+
return yaml.unsafe_load(f)
115+
116+
117+
class TableGenerator:
118+
119+
def __init__(self, rows_per_pass: int, dst_db_conn: sqlalchemy.Connection, table_data: TableGeneratorInfo, max_unique_constraint_tries: int | None):
120+
self.num_rows_per_pass = rows_per_pass
121+
self.table_data = table_data
122+
self.max_unique_constraint_tries = max_unique_constraint_tries
123+
self.existing_constraint_hashes: MutableMapping[str, set[int]] = {}
124+
self.context: Mapping = {}
125+
for constraint in table_data.unique_constraints:
126+
expr = sqlalchemy.select(constraint.columns)
127+
query_result = dst_db_conn.execute(expr).fetchall()
128+
self.existing_constraint_hashes[constraint.name] = set([
129+
hash(tuple(result))
130+
for result in query_result
131+
])
132+
133+
def set_context(self, context: Mapping):
134+
self.context = context
135+
136+
def __call__(self, dst_db_conn):
137+
result = {}
138+
columns_to_generate = set(self.table_data.nonnull_columns)
139+
# Which missingness patterns do we want?
140+
for choice in self.table_data.column_choices:
141+
cols = _call_from_context(choice.function_name, choice.argument_values, self.context)
142+
columns_to_generate.update(cols)
143+
144+
max_tries = self.max_unique_constraint_tries
145+
while columns_to_generate:
146+
if max_tries == 0:
147+
raise RuntimeError(f"Failed to satisfy unique constraints for table {self.table_data.table_name} after {self.max_unique_constraint_tries} attempts.")
148+
if max_tries is not None:
149+
max_tries -= 1
150+
for row_gen in self.table_data.row_gens:
151+
if set(row_gen.variable_names) & columns_to_generate:
152+
values = _call_from_context(row_gen.function_call.function_name, row_gen.function_call.argument_values, self.context)
153+
for index, variable_name in enumerate(row_gen.variable_names):
154+
result[variable_name] = values[index]
155+
columns_to_generate = set()
156+
for constraint in self.table_data.unique_constraints:
157+
cf_hash = hash(tuple(
158+
result[col.name] for col in constraint.columns
159+
))
160+
if cf_hash in self.existing_constraint_hashes[constraint.name]:
161+
columns_to_generate.update(c.name for c in constraint.columns)
162+
for constraint in self.table_data.unique_constraints:
163+
cf_hash = hash(tuple(
164+
result[col.name] for col in constraint.columns
165+
))
166+
self.existing_constraint_hashes.add(cf_hash)
167+
return result
168+
169+
def get_table_generator_dict(self, rows_per_pass: int, dst_db_conn: sqlalchemy.Connection, tables_data: Iterable[TableGeneratorInfo], max_unique_constraint_tries: int | None):
170+
return {
171+
"{{ table_data.table_name }}": TableGenerator(rows_per_pass, dst_db_conn, table_data, max_unique_constraint_tries)
172+
for table_data in tables_data
173+
}
174+
175+
176+
def get_vocab_dict(config: Mapping, metadata: sqlalchemy.MetaData) -> Mapping[str, FileUploader]: {
177+
name: FileUploader[metadata.tables[name]]
178+
for name in get_vocabulary_table_names(config)
179+
}
180+
181+
def get_story_generator_list(story_generator_infos: Iterable[StoryGeneratorInfo], context: Mapping) -> list[Mapping]:
182+
return [
183+
{
184+
"function": _call_from_context(gen_data.function_call.function_name, gen_data.function_call.argument_values, context),
185+
"num_stories_per_pass": {{ gen_data.num_stories_per_pass }},
186+
"name": "{{ gen_data.function_call.function_name }}",
187+
}
188+
for gen_data in story_generator_infos
189+
]

0 commit comments

Comments
 (0)