Skip to content

Commit 5172237

Browse files
authored
feat: add MIG hierarchy materialized view and diff view (#237)
1 parent 564ece2 commit 5172237

9 files changed

Lines changed: 2767 additions & 0 deletions

File tree

README.md

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,84 @@ ORDER BY sort_path;
249249

250250
</details>
251251

252+
#### Befüllen einer Datenbank mit MIG-Informationen
253+
Analog zu den AHBs lassen sich auch MIGs in eine Datenbank überführen und "flach" ziehen.
254+
Da MIGs die vollständige Nachrichtenstruktur beschreiben (Segmentgruppen, Segmente, Datenelementgruppen, Datenelemente und Codes), ist die Hierarchie oft tiefer als bei AHBs.
255+
256+
```python
257+
# pip install fundamend[sqlmodels]
258+
from pathlib import Path
259+
from fundamend.sqlmodels import create_db_and_populate_with_mig_view, MigHierarchyMaterialized
260+
from sqlmodel import Session, create_engine, select
261+
262+
mig_paths = [
263+
Path("UTILTS_MIG_1.1c_Lesefassung_2023_12_12.xml"),
264+
# weitere MIG XML-Dateien hier hinzufügen
265+
]
266+
sqlite_file = create_db_and_populate_with_mig_view(mig_paths)
267+
engine = create_engine(f"sqlite:///{sqlite_file}")
268+
with Session(bind=engine) as session:
269+
stmt = select(MigHierarchyMaterialized).where(MigHierarchyMaterialized.format == "UTILTS").order_by(
270+
MigHierarchyMaterialized.sort_path
271+
)
272+
results = session.exec(stmt).all()
273+
```
274+
oder in plain SQL:
275+
```sql
276+
-- sqlite dialect
277+
SELECT path,
278+
type,
279+
segmentgroup_id,
280+
segment_id,
281+
segment_name,
282+
dataelement_id,
283+
dataelement_name,
284+
code_value,
285+
code_name,
286+
line_status_std,
287+
line_status_specification
288+
FROM mig_hierarchy_materialized
289+
WHERE format = 'UTILTS'
290+
ORDER BY sort_path;
291+
```
292+
293+
<details>
294+
<summary>Finde heraus, welche Zeilen in einem MIG zwischen zwei Versionen hinzukommen, gelöscht oder geändert wurden</summary>
295+
<br>
296+
297+
Dafür gibt es die View `v_mig_diff`, die mit `create_mig_diff_view(session)` erstellt werden kann:
298+
```python
299+
from fundamend.sqlmodels import create_mig_diff_view
300+
create_mig_diff_view(session)
301+
```
302+
303+
Die View erwartet 4 Filter-Parameter beim Abfragen und liefert einen `diff_status`:
304+
- `added`: Zeile existiert in der neuen Version, aber nicht in der alten
305+
- `deleted`: Zeile existiert in der alten Version, aber nicht in der neuen
306+
- `modified`: Zeile existiert in beiden Versionen, aber mit unterschiedlichen Werten (bei `modified` enthält `changed_columns` die Liste der geänderten Spalten)
307+
- `unchanged`: Zeile ist in beiden Versionen identisch
308+
309+
Alle Wert-Spalten existieren doppelt (`old_*` und `new_*`), um die Werte aus beiden Versionen nebeneinander anzuzeigen.
310+
311+
**Matching-Strategie:** Diese View matched Zeilen anhand ihrer `id_path`-Spalte, die semantische Qualifier verwendet (z.B. `SG2>SG3>FTX+ACD>C_C107>D_4441>`), um Zeilen über Versionen hinweg zu identifizieren. Das ist konsistent mit der AHB-Diff-View.
312+
313+
```sql
314+
-- Alle Änderungen zwischen zwei MIG-Versionen anzeigen
315+
SELECT path, diff_status, changed_columns,
316+
old_line_status_std, new_line_status_std,
317+
old_line_status_specification, new_line_status_specification,
318+
old_line_name, new_line_name
319+
FROM v_mig_diff
320+
WHERE old_format_version = 'FV2504'
321+
AND new_format_version = 'FV2510'
322+
AND old_format = 'IFTSTA'
323+
AND new_format = 'IFTSTA'
324+
AND diff_status != 'unchanged'
325+
ORDER BY sort_path;
326+
```
327+
328+
</details>
329+
252330
### CLI Tool für XML➡️JSON Konvertierung
253331
Mit
254332
```bash

domain-specific-terms.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ rekursive
1818
finde
1919
contrl
2020
Elemente
21+
segmente
22+
hierarchie

src/fundamend/sqlmodels/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
MigSegmentGroup,
3838
MigSegmentGroupLink,
3939
)
40+
from .mig_diff_view import MigDiffLine, create_mig_diff_view
41+
from .migview import MigHierarchyMaterialized, create_db_and_populate_with_mig_view, create_mig_view
4042

4143
__all__ = [
4244
"create_ahb_view",
@@ -62,4 +64,9 @@
6264
"MigSegment",
6365
"MigSegmentGroup",
6466
"MigSegmentGroupLink",
67+
"create_mig_view",
68+
"MigHierarchyMaterialized",
69+
"create_db_and_populate_with_mig_view",
70+
"create_mig_diff_view",
71+
"MigDiffLine",
6572
]
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
-- Assume that materialize_mig_view.sql has been executed already.
2+
-- This view allows comparing two MIG versions to find added, deleted, and modified rows.
3+
--
4+
-- IMPORTANT: This view produces a cross-product of all version pairs. You MUST filter by version and format.
5+
--
6+
-- Usage for comparing FV2410 -> FV2504 for UTILTS format:
7+
-- SELECT * FROM v_mig_diff
8+
-- WHERE old_format_version = 'FV2410'
9+
-- AND old_format = 'UTILTS'
10+
-- AND new_format_version = 'FV2504'
11+
-- AND new_format = 'UTILTS'
12+
-- ORDER BY sort_path;
13+
--
14+
-- diff_status can be: 'added', 'deleted', 'modified', 'unchanged'
15+
-- The view compares line_status_std, line_status_specification, and line_name to determine modifications.
16+
--
17+
-- For deleted rows, old_ columns are populated and new_ columns are NULL.
18+
-- For added rows, new_ columns are populated and old_ columns are NULL.
19+
--
20+
-- MATCHING STRATEGY:
21+
-- This view matches rows by their id_path column, which uses semantic qualifiers
22+
-- (e.g., "SG2>SG3>FTX+ACD>C_C107>D_4441>") to identify rows across versions.
23+
-- This is consistent with how the AHB diff view works.
24+
-- Note: SQLite's "IS NOT" is a NULL-safe inequality operator (equivalent to SQL standard "IS DISTINCT FROM")
25+
26+
DROP TABLE IF EXISTS v_mig_diff;
27+
DROP VIEW IF EXISTS v_mig_diff;
28+
29+
CREATE VIEW v_mig_diff AS
30+
WITH version_pairs AS (SELECT DISTINCT old_v.edifact_format_version AS old_format_version,
31+
old_v.format AS old_format,
32+
new_v.edifact_format_version AS new_format_version,
33+
new_v.format AS new_format
34+
FROM (SELECT DISTINCT edifact_format_version, format FROM mig_hierarchy_materialized) old_v
35+
JOIN (SELECT DISTINCT edifact_format_version, format
36+
FROM mig_hierarchy_materialized) new_v
37+
ON old_v.format = new_v.format
38+
WHERE old_v.edifact_format_version < new_v.edifact_format_version),
39+
40+
-- Pre-compute changed_columns once, derive diff_status from it
41+
modified_check AS (SELECT TRIM(
42+
CASE
43+
WHEN old_tbl.line_status_std IS NOT new_tbl.line_status_std
44+
THEN 'line_status_std, '
45+
ELSE '' END ||
46+
CASE
47+
WHEN old_tbl.line_status_specification IS NOT new_tbl.line_status_specification
48+
THEN 'line_status_specification, '
49+
ELSE '' END ||
50+
CASE
51+
WHEN old_tbl.line_name IS NOT new_tbl.line_name
52+
THEN 'line_name'
53+
ELSE '' END
54+
, ', ') AS changed_columns,
55+
new_tbl.id_path AS id_path,
56+
new_tbl.sort_path AS sort_path,
57+
new_tbl.path AS path,
58+
new_tbl.type AS line_type,
59+
old_tbl.edifact_format_version AS old_format_version,
60+
old_tbl.format AS old_format,
61+
old_tbl.segmentgroup_id AS old_segmentgroup_id,
62+
old_tbl.segment_id AS old_segment_id,
63+
old_tbl.dataelement_id AS old_dataelement_id,
64+
old_tbl.code_value AS old_code_value,
65+
old_tbl.line_status_std AS old_line_status_std,
66+
old_tbl.line_status_specification AS old_line_status_specification,
67+
old_tbl.line_name AS old_line_name,
68+
new_tbl.edifact_format_version AS new_format_version,
69+
new_tbl.format AS new_format,
70+
new_tbl.segmentgroup_id AS new_segmentgroup_id,
71+
new_tbl.segment_id AS new_segment_id,
72+
new_tbl.dataelement_id AS new_dataelement_id,
73+
new_tbl.code_value AS new_code_value,
74+
new_tbl.line_status_std AS new_line_status_std,
75+
new_tbl.line_status_specification AS new_line_status_specification,
76+
new_tbl.line_name AS new_line_name
77+
FROM version_pairs vp
78+
JOIN mig_hierarchy_materialized new_tbl
79+
ON new_tbl.edifact_format_version = vp.new_format_version
80+
AND new_tbl.format = vp.new_format
81+
JOIN mig_hierarchy_materialized old_tbl
82+
ON old_tbl.edifact_format_version = vp.old_format_version
83+
AND old_tbl.format = vp.old_format
84+
AND old_tbl.id_path = new_tbl.id_path)
85+
86+
-- Modified and unchanged rows
87+
SELECT CASE WHEN changed_columns != '' THEN 'modified' ELSE 'unchanged' END AS diff_status,
88+
NULLIF(changed_columns, '') AS changed_columns,
89+
id_path,
90+
sort_path,
91+
path,
92+
line_type,
93+
old_format_version,
94+
old_format,
95+
old_segmentgroup_id,
96+
old_segment_id,
97+
old_dataelement_id,
98+
old_code_value,
99+
old_line_status_std,
100+
old_line_status_specification,
101+
old_line_name,
102+
new_format_version,
103+
new_format,
104+
new_segmentgroup_id,
105+
new_segment_id,
106+
new_dataelement_id,
107+
new_code_value,
108+
new_line_status_std,
109+
new_line_status_specification,
110+
new_line_name
111+
FROM modified_check
112+
113+
UNION ALL
114+
115+
-- Added rows (exist in new but not in old for the specific version pair)
116+
SELECT 'added' AS diff_status,
117+
NULL AS changed_columns,
118+
new_tbl.id_path,
119+
new_tbl.sort_path,
120+
new_tbl.path,
121+
new_tbl.type AS line_type,
122+
vp.old_format_version AS old_format_version,
123+
vp.old_format AS old_format,
124+
NULL AS old_segmentgroup_id,
125+
NULL AS old_segment_id,
126+
NULL AS old_dataelement_id,
127+
NULL AS old_code_value,
128+
NULL AS old_line_status_std,
129+
NULL AS old_line_status_specification,
130+
NULL AS old_line_name,
131+
new_tbl.edifact_format_version AS new_format_version,
132+
new_tbl.format AS new_format,
133+
new_tbl.segmentgroup_id AS new_segmentgroup_id,
134+
new_tbl.segment_id AS new_segment_id,
135+
new_tbl.dataelement_id AS new_dataelement_id,
136+
new_tbl.code_value AS new_code_value,
137+
new_tbl.line_status_std AS new_line_status_std,
138+
new_tbl.line_status_specification AS new_line_status_specification,
139+
new_tbl.line_name AS new_line_name
140+
FROM version_pairs vp
141+
JOIN mig_hierarchy_materialized new_tbl
142+
ON new_tbl.edifact_format_version = vp.new_format_version
143+
AND new_tbl.format = vp.new_format
144+
WHERE NOT EXISTS (SELECT 1
145+
FROM mig_hierarchy_materialized old_tbl
146+
WHERE old_tbl.edifact_format_version = vp.old_format_version
147+
AND old_tbl.format = vp.old_format
148+
AND old_tbl.id_path = new_tbl.id_path)
149+
150+
UNION ALL
151+
152+
-- Deleted rows (exist in old but not in new for the specific version pair)
153+
SELECT 'deleted' AS diff_status,
154+
NULL AS changed_columns,
155+
old_tbl.id_path,
156+
old_tbl.sort_path,
157+
old_tbl.path,
158+
old_tbl.type AS line_type,
159+
old_tbl.edifact_format_version AS old_format_version,
160+
old_tbl.format AS old_format,
161+
old_tbl.segmentgroup_id AS old_segmentgroup_id,
162+
old_tbl.segment_id AS old_segment_id,
163+
old_tbl.dataelement_id AS old_dataelement_id,
164+
old_tbl.code_value AS old_code_value,
165+
old_tbl.line_status_std AS old_line_status_std,
166+
old_tbl.line_status_specification AS old_line_status_specification,
167+
old_tbl.line_name AS old_line_name,
168+
vp.new_format_version AS new_format_version,
169+
vp.new_format AS new_format,
170+
NULL AS new_segmentgroup_id,
171+
NULL AS new_segment_id,
172+
NULL AS new_dataelement_id,
173+
NULL AS new_code_value,
174+
NULL AS new_line_status_std,
175+
NULL AS new_line_status_specification,
176+
NULL AS new_line_name
177+
FROM version_pairs vp
178+
JOIN mig_hierarchy_materialized old_tbl
179+
ON old_tbl.edifact_format_version = vp.old_format_version
180+
AND old_tbl.format = vp.old_format
181+
WHERE NOT EXISTS (SELECT 1
182+
FROM mig_hierarchy_materialized new_tbl
183+
WHERE new_tbl.edifact_format_version = vp.new_format_version
184+
AND new_tbl.format = vp.new_format
185+
AND new_tbl.id_path = old_tbl.id_path);

0 commit comments

Comments
 (0)