Skip to content

Commit 7076b9e

Browse files
committed
Respect entry morph-type when filtering and sorting (#2202)
* Add filtering on token aware headwords * Respect SecondaryOrder when sorting * Make FTS Headword column contain morph-tokens and all vernacular WS's
1 parent 71cb27f commit 7076b9e

24 files changed

Lines changed: 1552 additions & 828 deletions

backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
using FwDataMiniLcmBridge.Api;
2+
using FwDataMiniLcmBridge.LcmUtils;
13
using FwDataMiniLcmBridge.Tests.Fixtures;
4+
using MiniLcm.Models;
25

36
namespace FwDataMiniLcmBridge.Tests.MiniLcmTests;
47

@@ -9,4 +12,42 @@ protected override Task<IMiniLcmApi> NewApi()
912
{
1013
return Task.FromResult<IMiniLcmApi>(fixture.NewProjectApi("sorting-test", "en", "en"));
1114
}
15+
16+
[Theory]
17+
[InlineData("aaaa", SortField.Headword)] // FTS
18+
[InlineData("a", SortField.Headword)] // non-FTS
19+
[InlineData("aaaa", SortField.SearchRelevance)] // FTS
20+
[InlineData("a", SortField.SearchRelevance)] // non-FTS
21+
public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortField)
22+
{
23+
var unknownMorphTypeEntryId = Guid.NewGuid();
24+
Entry[] expected = [
25+
new() { Id = unknownMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1
26+
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2
27+
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6
28+
];
29+
30+
var ids = expected.Select(e => e.Id).ToHashSet();
31+
32+
foreach (var entry in Faker.Faker.Random.Shuffle(expected))
33+
await Api.CreateEntry(entry);
34+
35+
var fwDataApi = (BaseApi as FwDataMiniLcmApi)!;
36+
await fwDataApi.Cache.DoUsingNewOrCurrentUOW("Clear morph type",
37+
"Revert morph type",
38+
() =>
39+
{
40+
// the fwdata api doesn't allow creating entries with MorphType.Other or Unknown, so we force it
41+
var unknownMorphTypeEntry = fwDataApi.EntriesRepository.GetObject(unknownMorphTypeEntryId);
42+
unknownMorphTypeEntry.LexemeFormOA.MorphTypeRA = null;
43+
return ValueTask.CompletedTask;
44+
});
45+
46+
var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync())
47+
.Where(e => ids.Contains(e.Id))
48+
.ToList();
49+
50+
results.Should().BeEquivalentTo(expected,
51+
options => options.WithStrictOrdering());
52+
}
1253
}

backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -652,7 +652,7 @@ private Entry FromLexEntry(ILexEntry entry)
652652
{
653653
try
654654
{
655-
return new Entry
655+
var result = new Entry
656656
{
657657
Id = entry.Guid,
658658
Note = FromLcmMultiString(entry.Comment),
@@ -670,6 +670,7 @@ private Entry FromLexEntry(ILexEntry entry)
670670
// ILexEntry.PublishIn is a virtual property that inverts DoNotPublishInRC against all publications
671671
PublishIn = entry.PublishIn.Select(FromLcmPossibility).ToList(),
672672
};
673+
return result;
673674
}
674675
catch (Exception e)
675676
{
@@ -725,24 +726,22 @@ private ComplexFormComponent ToEntryReference(ILexEntry component, ILexEntry com
725726
return new ComplexFormComponent
726727
{
727728
ComponentEntryId = component.Guid,
728-
ComponentHeadword = component.LexEntryHeadwordOrUnknown(),
729+
ComponentHeadword = component.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now
729730
ComplexFormEntryId = complexEntry.Guid,
730-
ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(),
731+
ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now
731732
Order = Order(component, complexEntry)
732733
};
733734
}
734735

735-
736-
737736
private ComplexFormComponent ToSenseReference(ILexSense componentSense, ILexEntry complexEntry)
738737
{
739738
return new ComplexFormComponent
740739
{
741740
ComponentEntryId = componentSense.Entry.Guid,
742741
ComponentSenseId = componentSense.Guid,
743-
ComponentHeadword = componentSense.Entry.LexEntryHeadwordOrUnknown(),
742+
ComponentHeadword = componentSense.Entry.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now
744743
ComplexFormEntryId = complexEntry.Guid,
745-
ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(),
744+
ComplexFormHeadword = complexEntry.LexEntryHeadwordOrUnknown(applyMorphTokens: false), // match CRDT for now
746745
Order = Order(componentSense, complexEntry)
747746
};
748747
}
@@ -939,12 +938,13 @@ private IEnumerable<ILexEntry> GetFilteredAndSortedEntries(Func<ILexEntry, bool>
939938
private IEnumerable<ILexEntry> ApplySorting(SortOptions order, IEnumerable<ILexEntry> entries, string? query)
940939
{
941940
var sortWs = GetWritingSystemHandle(order.WritingSystem, WritingSystemType.Vernacular);
941+
var stemSecondaryOrder = MorphTypeRepository.GetObject(MoMorphTypeTags.kguidMorphStem).SecondaryOrder;
942942
if (order.Field == SortField.SearchRelevance)
943943
{
944-
return entries.ApplyRoughBestMatchOrder(order, sortWs, query);
944+
return entries.ApplyRoughBestMatchOrder(order, sortWs, stemSecondaryOrder, query);
945945
}
946946

947-
return order.ApplyOrder(entries, e => e.LexEntryHeadword(sortWs));
947+
return entries.ApplyHeadwordOrder(order, sortWs, stemSecondaryOrder);
948948
}
949949

950950
public IAsyncEnumerable<Entry> SearchEntries(string query, QueryOptions? options = null)
@@ -956,7 +956,7 @@ public IAsyncEnumerable<Entry> SearchEntries(string query, QueryOptions? options
956956
private Func<ILexEntry, bool>? EntrySearchPredicate(string? query = null)
957957
{
958958
if (string.IsNullOrEmpty(query)) return null;
959-
return entry => entry.CitationForm.SearchValue(query) ||
959+
return entry => entry.SearchHeadWord(query) || // CitationForm.SearchValue would be redundant
960960
entry.LexemeFormOA?.Form.SearchValue(query) is true ||
961961
entry.AllSenses.Any(s => s.Gloss.SearchValue(query));
962962
}

backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ namespace FwDataMiniLcmBridge.Api;
1111

1212
internal static class LcmHelpers
1313
{
14-
internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null)
14+
internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null, bool applyMorphTokens = true)
1515
{
1616
var citationFormTs =
1717
ws.HasValue ? entry.CitationForm.get_String(ws.Value)
@@ -27,15 +27,33 @@ internal static class LcmHelpers
2727
: null;
2828
var lexemeForm = lexemeFormTs?.Text?.Trim(WhitespaceChars);
2929

30-
return lexemeForm;
30+
if (string.IsNullOrEmpty(lexemeForm) || !applyMorphTokens) return lexemeForm;
31+
32+
var morphType = entry.LexemeFormOA?.MorphTypeRA;
33+
var leading = morphType?.Prefix ?? "";
34+
var trailing = morphType?.Postfix ?? "";
35+
return (leading + lexemeForm + trailing).Trim(WhitespaceChars);
3136
}
3237

33-
internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws = null)
38+
internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws = null, bool applyMorphTokens = true)
3439
{
35-
var headword = entry.LexEntryHeadword(ws);
40+
var headword = entry.LexEntryHeadword(ws, applyMorphTokens);
3641
return string.IsNullOrEmpty(headword) ? Entry.UnknownHeadword : headword;
3742
}
3843

44+
internal static bool SearchHeadWord(this ILexEntry entry, string value)
45+
{
46+
foreach (var ws in entry.Cache.ServiceLocator.WritingSystems.VernacularWritingSystems)
47+
{
48+
var headword = entry.HeadWordForWs(ws.Handle);
49+
if (headword is null) continue;
50+
var text = headword.Text;
51+
if (string.IsNullOrEmpty(text)) continue;
52+
if (text.ContainsDiacriticMatch(value)) return true;
53+
}
54+
return false;
55+
}
56+
3957
internal static bool SearchValue(this ITsMultiString multiString, string value)
4058
{
4159
for (var i = 0; i < multiString.StringCount; i++)

backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,31 +6,55 @@ namespace FwDataMiniLcmBridge.Api;
66

77
internal static class Sorting
88
{
9+
public static IEnumerable<ILexEntry> ApplyHeadwordOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder)
10+
{
11+
if (order.Ascending)
12+
{
13+
return entries
14+
.OrderBy(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false))
15+
.ThenBy(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
16+
// .ThenBy(e => e.HomographNumber)
17+
.ThenBy(e => e.Id.Guid);
18+
}
19+
else
20+
{
21+
return entries
22+
.OrderByDescending(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false))
23+
.ThenByDescending(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
24+
// .ThenByDescending(e => e.HomographNumber)
25+
.ThenByDescending(e => e.Id.Guid);
26+
}
27+
}
28+
929
/// <summary>
1030
/// Rough emulation of FTS search relevance. Headword matches come first, preferring
1131
/// prefix matches (e.g. when searching "tan" then "tanan" is before "matan"), then shorter, then alphabetical.
1232
/// See also: EntrySearchService.FilterAndRank for the FTS-based equivalent in LcmCrdt.
1333
/// </summary>
14-
public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, string? query = null)
34+
public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder, string? query = null)
1535
{
16-
var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle)));
36+
var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)));
1737
if (order.Ascending)
1838
{
1939
return projected
20-
.OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
21-
.ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
40+
.OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
41+
.ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
2242
.ThenBy(x => x.Headword?.Length ?? 0)
2343
.ThenBy(x => x.Headword)
44+
.ThenBy(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
45+
// .ThenBy(x => x.Entry.HomographNumber)
2446
.ThenBy(x => x.Entry.Id.Guid)
2547
.Select(x => x.Entry);
2648
}
2749
else
2850
{
2951
return projected
30-
.OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
31-
.ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
52+
.OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
53+
.ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
3254
.ThenByDescending(x => x.Headword?.Length ?? 0)
3355
.ThenByDescending(x => x.Headword)
56+
.ThenByDescending(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
57+
// .ThenByDescending(x => x.Entry.HomographNumber)
3458
.ThenByDescending(x => x.Entry.Id.Guid)
3559
.Select(x => x.Entry);
3660
}

backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using System.Diagnostics.CodeAnalysis;
12
using MiniLcm.Models;
23
using SIL.LCModel;
34

@@ -8,11 +9,13 @@ public class UpdateMorphTypeProxy : MorphType
89
private readonly IMoMorphType _lcmMorphType;
910
private readonly FwDataMiniLcmApi _lexboxLcmApi;
1011

12+
[SetsRequiredMembers]
1113
public UpdateMorphTypeProxy(IMoMorphType lcmMorphType, FwDataMiniLcmApi lexboxLcmApi)
1214
{
1315
_lcmMorphType = lcmMorphType;
1416
Id = lcmMorphType.Guid;
1517
_lexboxLcmApi = lexboxLcmApi;
18+
Kind = LcmHelpers.FromLcmMorphType(lcmMorphType);
1619
}
1720

1821
public override MultiString Name

0 commit comments

Comments
 (0)