Skip to content

Commit c6d0b25

Browse files
hf-kkleinclaude
andauthored
feat: add Unicode-aware expression indexes for German umlaut search (#261)
SQLite's LOWER() only handles ASCII — LOWER('Ä') returns 'Ä', not 'ä'. Add expression indexes using REPLACE(REPLACE(REPLACE(LOWER(...), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü')) for all columns used in ahb-tabellen search. These indexes enable the LIKE optimization for prefix queries (startsWith) when the search code uses the matching REPLACE expression. Substring queries (%pattern%) always do full scans regardless of indexes. Related: Hochfrequenz/ahb-tabellen#790 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 38e2888 commit c6d0b25

1 file changed

Lines changed: 8 additions & 0 deletions

File tree

src/fundamend/sqlmodels/materialize_ahb_view.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -636,14 +636,17 @@ CREATE INDEX idx_hierarchy_gueltig_von ON ahb_hierarchy_materialized (gueltig_vo
636636
CREATE INDEX idx_hierarchy_gueltig_bis ON ahb_hierarchy_materialized (gueltig_bis);
637637
CREATE INDEX idx_hierarchy_beschreibung ON ahb_hierarchy_materialized (beschreibung);
638638
CREATE INDEX idx_hierarchy_beschreibung_lower ON ahb_hierarchy_materialized (lower(beschreibung));
639+
CREATE INDEX idx_hierarchy_beschreibung_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(beschreibung), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
639640
CREATE INDEX idx_hierarchy_kommunikationsrichtungen ON ahb_hierarchy_materialized (kommunikationsrichtungen);
640641
CREATE INDEX idx_hierarchy_edifact_format_version ON ahb_hierarchy_materialized (edifact_format_version);
641642
CREATE INDEX idx_hierarchy_segmentgroup_id ON ahb_hierarchy_materialized (segmentgroup_id);
642643
CREATE INDEX idx_hierarchy_segmentgroup_id_lower ON ahb_hierarchy_materialized (lower(segmentgroup_id));
644+
CREATE INDEX idx_hierarchy_segmentgroup_id_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(segmentgroup_id), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
643645
CREATE INDEX idx_hierarchy_segmentgroup_name ON ahb_hierarchy_materialized (segmentgroup_name);
644646
CREATE INDEX idx_hierarchy_segmentgroup_position ON ahb_hierarchy_materialized (segmentgroup_position);
645647
CREATE INDEX idx_hierarchy_segment_id ON ahb_hierarchy_materialized (segment_id);
646648
CREATE INDEX idx_hierarchy_segment_id_lower ON ahb_hierarchy_materialized (lower(segment_id));
649+
CREATE INDEX idx_hierarchy_segment_id_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(segment_id), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
647650
CREATE INDEX idx_hierarchy_segment_name ON ahb_hierarchy_materialized (segment_name);
648651
CREATE INDEX idx_hierarchy_segment_number ON ahb_hierarchy_materialized (segment_number);
649652
CREATE INDEX idx_hierarchy_segment_position ON ahb_hierarchy_materialized (segment_position);
@@ -652,15 +655,18 @@ CREATE INDEX idx_hierarchy_dataelementgroup_name ON ahb_hierarchy_materialized (
652655
CREATE INDEX idx_hierarchy_dataelementgroup_position ON ahb_hierarchy_materialized (dataelementgroup_position);
653656
CREATE INDEX idx_hierarchy_dataelement_id ON ahb_hierarchy_materialized (dataelement_id);
654657
CREATE INDEX idx_hierarchy_dataelement_id_lower ON ahb_hierarchy_materialized (lower(dataelement_id));
658+
CREATE INDEX idx_hierarchy_dataelement_id_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(dataelement_id), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
655659
CREATE INDEX idx_hierarchy_dataelement_name ON ahb_hierarchy_materialized (dataelement_name);
656660
CREATE INDEX idx_hierarchy_dataelement_position ON ahb_hierarchy_materialized (dataelement_position);
657661
CREATE INDEX idx_hierarchy_dataelement_ahb_status ON ahb_hierarchy_materialized (dataelement_ahb_status);
658662
CREATE INDEX idx_hierarchy_code_id ON ahb_hierarchy_materialized (code_id);
659663
CREATE INDEX idx_hierarchy_code_name ON ahb_hierarchy_materialized (code_name);
660664
CREATE INDEX idx_hierarchy_code_description ON ahb_hierarchy_materialized (code_description);
661665
CREATE INDEX idx_hierarchy_code_description_lower ON ahb_hierarchy_materialized (lower(code_description));
666+
CREATE INDEX idx_hierarchy_code_description_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(code_description), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
662667
CREATE INDEX idx_hierarchy_code_value ON ahb_hierarchy_materialized (code_value);
663668
CREATE INDEX idx_hierarchy_code_value_lower ON ahb_hierarchy_materialized (lower(code_value));
669+
CREATE INDEX idx_hierarchy_code_value_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(code_value), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
664670
CREATE INDEX idx_hierarchy_code_ahb_status ON ahb_hierarchy_materialized (code_ahb_status);
665671
CREATE INDEX idx_hierarchy_code_position ON ahb_hierarchy_materialized (code_position);
666672
CREATE INDEX idx_hierarchy_path ON ahb_hierarchy_materialized (path);
@@ -674,8 +680,10 @@ CREATE INDEX idx_ahb_tabellen_filter2 ON ahb_hierarchy_materialized (type) WHERE
674680
-- indexes for computed columns for v_ahbtabellen
675681
CREATE INDEX idx_line_ahb_status ON ahb_hierarchy_materialized (line_ahb_status);
676682
CREATE INDEX idx_line_ahb_status_lower ON ahb_hierarchy_materialized (lower(line_ahb_status));
683+
CREATE INDEX idx_line_ahb_status_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(line_ahb_status), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
677684
CREATE INDEX idx_line_name ON ahb_hierarchy_materialized (line_name);
678685
CREATE INDEX idx_line_name_lower ON ahb_hierarchy_materialized (lower(line_name));
686+
CREATE INDEX idx_line_name_unicode_lower ON ahb_hierarchy_materialized (REPLACE(REPLACE(REPLACE(LOWER(line_name), 'Ä', 'ä'), 'Ö', 'ö'), 'Ü', 'ü'));
679687
CREATE INDEX idx_hierarchy_sort_path_per_ahb ON ahb_hierarchy_materialized (sort_path, pruefidentifikator, edifact_format_version);
680688

681689
-- Fallback: append occurrence counter '#N' to any id_paths that are still not unique after qualifier injection.

0 commit comments

Comments
 (0)