-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpipeline.py
More file actions
44 lines (32 loc) · 991 Bytes
/
pipeline.py
File metadata and controls
44 lines (32 loc) · 991 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# pipeline.py
import json
import datetime
STATUS_WHITELIST = ["pending", "confirmed", "shipped", "cancelled"]
stats = {
"processed": 0,
"failed": 0,
"skipped": 0,
}
def enrich(d):
d["total_with_tax"] = round(d["amount"] * 1.21, 2)
d["processed_at"] = datetime.datetime.utcnow().isoformat()
return d
def process_data(input_path, output_path):
with open(input_path, "r") as f:
records = json.load(f)
results = []
failed_records = []
for record in records:
if "order_id" not in record or "amount" not in record or "status" not in record:
stats["failed"] += 1
failed_records.append(record)
continue
if record["status"] not in STATUS_WHITELIST:
stats["skipped"] += 1
continue
res = enrich(record)
results.append(res)
stats["processed"] += 1
with open(output_path, "w") as f:
json.dump(results, f, indent=2)
return stats