Skip to content

Commit 7ee6ea4

Browse files
committed
Improved function to generate sample data
1 parent b53968e commit 7ee6ea4

1 file changed

Lines changed: 240 additions & 34 deletions

File tree

resources/generate_sample_data.py

Lines changed: 240 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
import random
99
from datetime import datetime, timedelta
10+
import argparse
11+
import os
12+
import sys
1013

1114
# 한글 데이터
1215
korean_companies = [
@@ -112,40 +115,243 @@
112115
('Storage Locker', 'Furniture'), ('Privacy Screen', 'Furniture')
113116
]
114117

115-
def generate_mssql_customers():
116-
"""MSSQL용 Customers INSERT 문 생성"""
117-
sql = []
118-
119-
for i in range(50):
120-
code = f"'CUST{i+1:03d}'"
121-
company = f"'(주){korean_companies[i]}'" if i < 10 else f"'{korean_companies[i]}'"
122-
name = f"'{korean_names[i]}'"
123-
email = f"'{korean_names[i].replace(' ', '').lower()}@{korean_companies[i].replace(' ', '').lower()}.co.kr'"
124-
phone = f"'02-{random.randint(1000,9999)}-{random.randint(1000,9999)}'"
125-
city = f"'{korean_cities[i % len(korean_cities)]}'"
126-
region = f"'{korean_regions[i % len(korean_regions)]}'"
127-
ctype = random.choice(['Premium', 'Regular', 'VIP'])
128-
credit = random.randint(150, 2000) * 100000
129-
130-
sql.append(f"({code}, {company}, {name}, {email}, {phone}, '서울시 강남구', {city}, {region}, '대한민국', '{ctype}', {credit}.00, 1)")
131-
132-
for i in range(50):
133-
code = f"'CUST{i+51:03d}'"
134-
company = f"'{english_companies[i]}'"
135-
name = f"'{english_names[i]}'"
136-
email = f"'{english_names[i].split()[0].lower()}@{english_companies[i].split()[0].lower()}.com'"
137-
phone = f"'+1-555-{random.randint(1000,9999)}'"
138-
city = f"'{english_cities[i]}'"
139-
ctype = random.choice(['Premium', 'Regular', 'VIP'])
140-
credit = random.randint(200, 2500) * 100000
141-
142-
sql.append(f"({code}, {company}, {name}, {email}, {phone}, 'Address', {city}, 'State', 'USA', '{ctype}', {credit}.00, 1)")
143-
144-
return ",\n".join(sql)
118+
def _fmt_str(s):
119+
return "'" + str(s).replace("'", "''") + "'"
120+
121+
def _fmt_bool(dialect, v):
122+
if dialect == 'postgresql':
123+
return 'TRUE' if v else 'FALSE'
124+
# others accept 1/0, Oracle uses NUMBER(1)
125+
return '1' if v else '0'
126+
127+
def _fmt_dt_str(dt):
128+
return dt.strftime('%Y-%m-%d %H:%M:%S')
129+
130+
def _fmt_datetime(dialect, dt):
131+
s = _fmt_dt_str(dt)
132+
if dialect == 'oracle':
133+
return f"TO_TIMESTAMP({_fmt_str(s)}, 'YYYY-MM-DD HH24:MI:SS')"
134+
return _fmt_str(s)
135+
136+
def _now():
137+
return datetime.utcnow()
138+
139+
def gen_customers(rows):
140+
data = []
141+
for i in range(rows // 2):
142+
data.append({
143+
'code': f"CUST{i+1:03d}",
144+
'name': ("(주)" if i < 10 else "") + korean_companies[i % len(korean_companies)],
145+
'contact': korean_names[i % len(korean_names)],
146+
'email': f"{korean_names[i % len(korean_names)].replace(' ', '').lower()}@{korean_companies[i % len(korean_companies)].replace(' ', '').lower()}.co.kr",
147+
'phone': f"02-{random.randint(1000,9999)}-{random.randint(1000,9999)}",
148+
'address': '서울시 강남구',
149+
'city': korean_cities[i % len(korean_cities)],
150+
'region': korean_regions[i % len(korean_regions)],
151+
'country': '대한민국',
152+
'ctype': random.choice(['Premium','Regular','VIP']),
153+
'credit': float(random.randint(150,2000) * 100000),
154+
'active': True
155+
})
156+
for i in range(rows - len(data)):
157+
data.append({
158+
'code': f"CUST{i+1+len(data):03d}",
159+
'name': english_companies[i % len(english_companies)],
160+
'contact': english_names[i % len(english_names)],
161+
'email': f"{english_names[i % len(english_names)].split()[0].lower()}@{english_companies[i % len(english_companies)].split()[0].lower()}.com",
162+
'phone': f"+1-555-{random.randint(1000,9999)}",
163+
'address': 'Address',
164+
'city': english_cities[i % len(english_cities)],
165+
'region': 'State',
166+
'country': 'USA',
167+
'ctype': random.choice(['Premium','Regular','VIP']),
168+
'credit': float(random.randint(200,2500) * 100000),
169+
'active': True
170+
})
171+
return data
172+
173+
def gen_products(rows):
174+
data = []
175+
for i in range(rows // 2):
176+
name, cat = korean_products[i % len(korean_products)]
177+
data.append({
178+
'code': f"P-{100+i}", 'name': name, 'cat': cat,
179+
'price': round(random.uniform(10, 2000), 2),
180+
'stock': random.randint(0, 500), 'onorder': random.randint(0, 200),
181+
'reorder': random.randint(0, 50), 'disc': False,
182+
'desc': None
183+
})
184+
for i in range(rows - len(data)):
185+
name, cat = english_products[i % len(english_products)]
186+
data.append({
187+
'code': f"P-{200+i}", 'name': name, 'cat': cat,
188+
'price': round(random.uniform(10, 2000), 2),
189+
'stock': random.randint(0, 500), 'onorder': random.randint(0, 200),
190+
'reorder': random.randint(0, 50), 'disc': False,
191+
'desc': None
192+
})
193+
return data
194+
195+
def gen_employees(rows):
196+
data = []
197+
base = datetime(1980, 1, 1)
198+
for i in range(rows):
199+
first = random.choice(['Alice','Brian','Cathy','David','Evan','Fiona','George','Hanna','Ian','Julia'])
200+
last = random.choice(['Kim','Lee','Park','Choi','Jung','Kang','Yoon','Lim','Song','Han'])
201+
hire = _now() - timedelta(days=random.randint(0, 3650))
202+
birth = datetime(1970,1,1) + timedelta(days=random.randint(0, 20000))
203+
data.append({
204+
'code': f"E-{i+1:03d}", 'first': first, 'last': last,
205+
'title': random.choice(['Manager','Engineer','Analyst','Assistant','Director']),
206+
'birth': birth.date(), 'hire': hire.date(),
207+
'email': f"{first.lower()}.{last.lower()}@example.com",
208+
'phone': f"010-{random.randint(1000,9999)}-{random.randint(1000,9999)}",
209+
'dept': random.choice(['Sales','IT','Finance','HR','Marketing']),
210+
'salary': round(random.uniform(3000,9000),2),
211+
'reports': None if i==0 else random.randint(1, i),
212+
'active': True
213+
})
214+
return data
215+
216+
def gen_orders(rows, customers_count, employees_count):
217+
data = []
218+
start = datetime(2024,1,1)
219+
for i in range(rows):
220+
odt = start + timedelta(days=random.randint(0,60), hours=random.randint(0,23), minutes=random.randint(0,59))
221+
ship = None if random.random() < 0.3 else (odt + timedelta(days=random.randint(1,7)))
222+
req = odt + timedelta(days=random.randint(1,10))
223+
subtotal = round(random.uniform(50, 5000), 2)
224+
tax = round(subtotal * 0.1, 2)
225+
total = round(subtotal + tax, 2)
226+
data.append({
227+
'number': f"SO-2024{i+1:05d}",
228+
'customer_id': random.randint(1, customers_count),
229+
'order_date': odt,
230+
'required_date': req,
231+
'shipped_date': ship,
232+
'status': random.choice(['Pending','Shipped','Delivered']),
233+
'subtotal': subtotal,
234+
'tax': tax,
235+
'total': total,
236+
'pay_method': random.choice(['Card','Wire','Cash']),
237+
'pay_status': random.choice(['Unpaid','Paid']),
238+
'emp_id': random.randint(1, max(1, employees_count)),
239+
'notes': None
240+
})
241+
return data
242+
243+
def gen_order_details(rows, orders_count, products_count):
244+
data = []
245+
for _ in range(rows):
246+
qty = random.randint(1,5)
247+
price = round(random.uniform(5, 2000),2)
248+
data.append({
249+
'order_id': random.randint(1, orders_count),
250+
'product_id': random.randint(1, products_count),
251+
'unit_price': price,
252+
'qty': qty,
253+
'discount': round(random.choice([0,0,0,5,10]),2)
254+
})
255+
return data
256+
257+
def render_inserts(dialect, table, rows):
258+
lines = []
259+
if table == 'customers':
260+
cols = '(CustomerCode, CustomerName, ContactName, Email, Phone, Address, City, Region, Country, CustomerType, CreditLimit, IsActive)'
261+
for r in rows:
262+
vals = [
263+
_fmt_str(r['code']), _fmt_str(r['name']), _fmt_str(r['contact']), _fmt_str(r['email']), _fmt_str(r['phone']),
264+
_fmt_str(r['address']), _fmt_str(r['city']), _fmt_str(r['region']), _fmt_str(r['country']), _fmt_str(r['ctype']),
265+
f"{r['credit']:.2f}", _fmt_bool(dialect, r['active'])
266+
]
267+
lines.append(f"INSERT INTO Customers {cols} VALUES (" + ", ".join(vals) + ");")
268+
elif table == 'products':
269+
cols = '(ProductCode, ProductName, Category, UnitPrice, UnitsInStock, UnitsOnOrder, ReorderLevel, Discontinued, Description)'
270+
for r in rows:
271+
vals = [
272+
_fmt_str(r['code']), _fmt_str(r['name']), _fmt_str(r['cat']), f"{r['price']:.2f}",
273+
str(r['stock']), str(r['onorder']), str(r['reorder']), _fmt_bool(dialect, r['disc']), 'NULL' if r['desc'] is None else _fmt_str(r['desc'])
274+
]
275+
lines.append(f"INSERT INTO Products {cols} VALUES (" + ", ".join(vals) + ");")
276+
elif table == 'employees':
277+
cols = '(EmployeeCode, FirstName, LastName, Title, BirthDate, HireDate, Email, Phone, Department, Salary, ReportsTo, IsActive)'
278+
for r in rows:
279+
birth = _fmt_str(r['birth'].strftime('%Y-%m-%d')) if dialect != 'oracle' else f"TO_DATE({_fmt_str(r['birth'].strftime('%Y-%m-%d'))}, 'YYYY-MM-DD')"
280+
hire = _fmt_str(r['hire'].strftime('%Y-%m-%d')) if dialect != 'oracle' else f"TO_DATE({_fmt_str(r['hire'].strftime('%Y-%m-%d'))}, 'YYYY-MM-DD')"
281+
vals = [
282+
_fmt_str(r['code']), _fmt_str(r['first']), _fmt_str(r['last']), _fmt_str(r['title']),
283+
birth, hire, _fmt_str(r['email']), _fmt_str(r['phone']), _fmt_str(r['dept']), f"{r['salary']:.2f}",
284+
'NULL' if r['reports'] is None else str(r['reports']), _fmt_bool(dialect, r['active'])
285+
]
286+
lines.append(f"INSERT INTO Employees {cols} VALUES (" + ", ".join(vals) + ");")
287+
elif table == 'orders':
288+
cols = '(OrderNumber, CustomerID, OrderDate, RequiredDate, ShippedDate, OrderStatus, SubTotal, TaxAmount, TotalAmount, PaymentMethod, PaymentStatus, EmployeeID, Notes)'
289+
for r in rows:
290+
order_dt = _fmt_datetime(dialect, r['order_date'])
291+
req_dt = _fmt_datetime(dialect, r['required_date'])
292+
ship_dt = 'NULL' if r['shipped_date'] is None else _fmt_datetime(dialect, r['shipped_date'])
293+
vals = [
294+
_fmt_str(r['number']), str(r['customer_id']), order_dt, req_dt, ship_dt,
295+
_fmt_str(r['status']), f"{r['subtotal']:.2f}", f"{r['tax']:.2f}", f"{r['total']:.2f}",
296+
_fmt_str(r['pay_method']), _fmt_str(r['pay_status']), str(r['emp_id']), 'NULL'
297+
]
298+
lines.append(f"INSERT INTO Orders {cols} VALUES (" + ", ".join(vals) + ");")
299+
elif table == 'orderdetails':
300+
cols = '(OrderID, ProductID, UnitPrice, Quantity, Discount)'
301+
for r in rows:
302+
vals = [str(r['order_id']), str(r['product_id']), f"{r['unit_price']:.2f}", str(r['qty']), f"{r['discount']:.2f}"]
303+
lines.append(f"INSERT INTO OrderDetails {cols} VALUES (" + ", ".join(vals) + ");")
304+
return lines
305+
306+
def main():
307+
parser = argparse.ArgumentParser()
308+
parser.add_argument('--dialect', '--db', dest='dialect', choices=['mssql','mysql','postgresql','oracle','sqlite'], default='mssql')
309+
parser.add_argument('--tables', default='customers,products,employees,orders,orderdetails')
310+
parser.add_argument('--rows', type=int, default=100)
311+
parser.add_argument('--output', default='')
312+
args = parser.parse_args()
313+
314+
# Normalize dialect (accept common synonyms via --db/--dialect)
315+
alias_map = {
316+
'postgres': 'postgresql', 'pg': 'postgresql',
317+
'maria': 'mysql', 'mariadb': 'mysql'
318+
}
319+
dialect = alias_map.get(args.dialect.lower(), args.dialect.lower())
320+
tables = [t.strip().lower() for t in args.tables.split(',') if t.strip()]
321+
rows = args.rows
322+
323+
customers = gen_customers(rows)
324+
products = gen_products(rows)
325+
employees = gen_employees(min(100, max(10, rows//2)))
326+
orders = gen_orders(min(100, rows), len(customers), len(employees))
327+
orderdetails = gen_order_details(rows*2, len(orders), len(products))
328+
329+
all_lines = []
330+
if 'customers' in tables:
331+
all_lines.append(f"-- Customers ({len(customers)} records)")
332+
all_lines.extend(render_inserts(dialect, 'customers', customers))
333+
if 'products' in tables:
334+
all_lines.append(f"-- Products ({len(products)} records)")
335+
all_lines.extend(render_inserts(dialect, 'products', products))
336+
if 'employees' in tables:
337+
all_lines.append(f"-- Employees ({len(employees)} records)")
338+
all_lines.extend(render_inserts(dialect, 'employees', employees))
339+
if 'orders' in tables:
340+
all_lines.append(f"-- Orders ({len(orders)} records)")
341+
all_lines.extend(render_inserts(dialect, 'orders', orders))
342+
if 'orderdetails' in tables:
343+
all_lines.append(f"-- OrderDetails ({len(orderdetails)} records)")
344+
all_lines.extend(render_inserts(dialect, 'orderdetails', orderdetails))
345+
346+
output = "\n".join(all_lines) + "\n"
347+
out_path = args.output
348+
if not out_path:
349+
script_dir = os.path.dirname(__file__)
350+
out_path = os.path.join(script_dir, f"sample_data_{dialect}.sql")
351+
with open(out_path, 'w', encoding='utf-8') as f:
352+
f.write(output)
353+
sys.stdout.write(out_path + "\n")
145354

146355
if __name__ == "__main__":
147-
print("-- Customers (100 records)")
148-
print("INSERT INTO dbo.Customers (CustomerCode, CustomerName, ContactName, Email, Phone, Address, City, Region, Country, CustomerType, CreditLimit, IsActive) VALUES")
149-
print(generate_mssql_customers())
150-
print("GO\n")
356+
main()
151357

0 commit comments

Comments
 (0)