Skip to content

Commit 8ed192f

Browse files
committed
Seed canonical morph types and regenerate search index (#2219)
* Seed canonical morph-types into CRDT projects - Add CanonicalMorphTypes with all 19 morph-type definitions (GUIDs from LibLCM) - Seed morph-types for new projects via PreDefinedData.PredefinedMorphTypes - Seed morph-types for existing projects in MigrateDb (before FTS refresh) - Add EF migration to clear FTS table so headwords are rebuilt with morph tokens - Patch legacy snapshots (empty MorphTypes) in sync layer to prevent duplicates * Stop creating morph-types in tests. They're now prepopulated * Stop printing verify diff content. It's too much. * Seed morph types before API testing * Add descriptions to canonical morph types * Sync morph-types when importing, because they already exist in CRDT * Verify our canonical morph-types match new fwdata projects * Fix non-FTS relevance order with morph-tokens in query
1 parent 5abe103 commit 8ed192f

26 files changed

Lines changed: 1340 additions & 156 deletions
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
using FwDataMiniLcmBridge.Api;
2+
using FwDataMiniLcmBridge.LcmUtils;
3+
using FwDataMiniLcmBridge.Tests.Fixtures;
4+
using Microsoft.Extensions.DependencyInjection;
5+
using Microsoft.Extensions.Options;
6+
using MiniLcm.Models;
7+
8+
namespace FwDataMiniLcmBridge.Tests;
9+
10+
public class CanonicalMorphTypeTests : IDisposable
11+
{
12+
private readonly ServiceProvider _serviceProvider;
13+
private readonly FwDataMiniLcmApi _api;
14+
private readonly FwDataProject _project;
15+
16+
public CanonicalMorphTypeTests()
17+
{
18+
var services = new ServiceCollection()
19+
.AddTestFwDataBridge(mockProjectLoader: false)
20+
.PostConfigure<FwDataBridgeConfig>(config =>
21+
config.TemplatesFolder = Path.GetFullPath("Templates"))
22+
.BuildServiceProvider();
23+
_serviceProvider = services;
24+
25+
var config = services.GetRequiredService<IOptions<FwDataBridgeConfig>>();
26+
Directory.CreateDirectory(config.Value.ProjectsFolder);
27+
var projectName = $"canonical-morph-types-test_{Guid.NewGuid()}";
28+
_project = new FwDataProject(projectName, config.Value.ProjectsFolder);
29+
var projectLoader = services.GetRequiredService<IProjectLoader>();
30+
projectLoader.NewProject(_project, "en", "en");
31+
32+
var fwDataFactory = services.GetRequiredService<FwDataFactory>();
33+
_api = fwDataFactory.GetFwDataMiniLcmApi(_project, false);
34+
}
35+
36+
public void Dispose()
37+
{
38+
_api.Dispose();
39+
_serviceProvider.Dispose();
40+
if (Directory.Exists(_project.ProjectFolder))
41+
Directory.Delete(_project.ProjectFolder, true);
42+
}
43+
44+
[Fact]
45+
public async Task CanonicalMorphTypes_MatchNewLangProjMorphTypes()
46+
{
47+
var libLcmMorphTypes = await _api.GetMorphTypes().ToArrayAsync();
48+
libLcmMorphTypes.Should().NotBeEmpty();
49+
CanonicalMorphTypes.All.Values.Should().BeEquivalentTo(libLcmMorphTypes);
50+
}
51+
}

backend/FwLite/FwDataMiniLcmBridge.Tests/FwDataMiniLcmBridge.Tests.csproj

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,9 @@
3535
</ItemGroup>
3636
<ItemGroup>
3737
<Folder Include="TestData\" />
38+
<PackageReference Include="SIL.LCModel" GeneratePathProperty="true" />
39+
<Content Include="$(PkgSIL_LCModel)/contentFiles/Templates/*.*"
40+
Link="Templates/%(Filename)%(Extension)"
41+
CopyToOutputDirectory="PreserveNewest" />
3842
</ItemGroup>
39-
</Project>
43+
</Project>

backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortFiel
2222
{
2323
var unknownMorphTypeEntryId = Guid.NewGuid();
2424
Entry[] expected = [
25-
new() { Id = unknownMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1
26-
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2
27-
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6
25+
new() { Id = unknownMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 0
26+
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 10
27+
new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 70
2828
];
2929

3030
var ids = expected.Select(e => e.Id).ToHashSet();

backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,16 @@ public static IEnumerable<ILexEntry> ApplyHeadwordOrder(this IEnumerable<ILexEnt
3333
/// </summary>
3434
public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<ILexEntry> entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder, string? query = null)
3535
{
36-
var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)));
36+
var projected = entries.Select(e => (
37+
Entry: e,
38+
Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false),
39+
HeadwordWithTokens: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: true)
40+
));
3741
if (order.Ascending)
3842
{
3943
return projected
40-
.OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
41-
.ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
44+
.OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.StartsWithDiacriticMatch(query!) ?? false))
45+
.ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.ContainsDiacriticMatch(query!) ?? false))
4246
.ThenBy(x => x.Headword?.Length ?? 0)
4347
.ThenBy(x => x.Headword)
4448
.ThenBy(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)
@@ -49,8 +53,8 @@ public static IEnumerable<ILexEntry> ApplyRoughBestMatchOrder(this IEnumerable<I
4953
else
5054
{
5155
return projected
52-
.OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false))
53-
.ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false))
56+
.OrderBy(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.StartsWithDiacriticMatch(query!) ?? false))
57+
.ThenBy(x => !string.IsNullOrEmpty(query) && (x.HeadwordWithTokens?.ContainsDiacriticMatch(query!) ?? false))
5458
.ThenByDescending(x => x.Headword?.Length ?? 0)
5559
.ThenByDescending(x => x.Headword)
5660
.ThenByDescending(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder)

backend/FwLite/FwLiteProjectSync.Tests/FluentAssertGlobalConfig.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using System.Runtime.CompilerServices;
12
using FluentAssertions.Extensibility;
23
using FwLiteProjectSync.Tests;
34

@@ -7,6 +8,12 @@ namespace FwLiteProjectSync.Tests;
78

89
public static class FluentAssertGlobalConfig
910
{
11+
[ModuleInitializer]
12+
internal static void InitVerify()
13+
{
14+
VerifierSettings.OmitContentFromException();
15+
}
16+
1017
public static void Initialize()
1118
{
1219
MiniLcm.Tests.FluentAssertGlobalConfig.Initialize();

backend/FwLite/FwLiteProjectSync.Tests/Import/ResumableTests.cs

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,7 @@ public async Task ImportProject_IsResumable_AcrossRandomFailures()
3838
}).ToList();
3939
var expectedPartsOfSpeech = Enumerable.Range(1, 10)
4040
.Select(i => new PartOfSpeech { Id = Guid.NewGuid(), Name = { ["en"] = $"pos{i}" } }).ToList();
41-
var expectedMorphTypes = Enum.GetValues<MorphTypeKind>()
42-
.Select(typ => new MorphType()
43-
{
44-
Id = Guid.NewGuid(),
45-
Name = new() { ["en"] = $"Test Morph Type {(int)typ} {typ}" },
46-
Abbreviation = new() { ["en"] = $"Tst MrphTyp{(int)typ}" },
47-
Description = new() { { "en", new RichString($"test desc for {typ}") } },
48-
Prefix = null,
49-
Postfix = null,
50-
Kind = typ,
51-
SecondaryOrder = 0
52-
}).ToList();
41+
var expectedMorphTypes = CanonicalMorphTypes.All.Values;
5342

5443
var mockFrom = new Mock<IMiniLcmApi>();
5544
IMiniLcmApi mockTo = new UnreliableApi(
@@ -132,7 +121,6 @@ public async Task ImportProject_IsResumable_AcrossRandomFailures()
132121
createdEntries.Select(e => e.LexemeForm["en"]).Should().BeEquivalentTo(expectedEntries.Select(e => e.LexemeForm["en"]));
133122
createdMorphTypes.Select(e => e.Name["en"]).Should().BeEquivalentTo(expectedMorphTypes.Select(e => e.Name["en"]));
134123
createdMorphTypes.Select(e => e.Kind).Should().BeEquivalentTo(expectedMorphTypes.Select(e => e.Kind));
135-
136124
}
137125

138126

backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,18 @@ private async Task<SyncResult> SyncOrImportInternal(IMiniLcmApi crdtApi, IMiniLc
7676
{
7777
// Repair any missing translation IDs before doing the full sync, so the sync doesn't have to deal with them
7878
var syncedIdCount = await CrdtRepairs.SyncMissingTranslationIds(projectSnapshot.Entries, fwdata, crdt, dryRun);
79+
80+
// Patch legacy snapshots that were created before morph-type support.
81+
// After seeding, the CRDT has morph-types but the snapshot still has [].
82+
// Without this patch, the diff would see all morph-types as "new" and try to re-add them.
83+
if (projectSnapshot.MorphTypes.Length == 0)
84+
{
85+
var currentCrdtMorphTypes = await crdt.GetMorphTypes().ToArrayAsync();
86+
if (currentCrdtMorphTypes.Length > 0)
87+
{
88+
projectSnapshot = projectSnapshot with { MorphTypes = currentCrdtMorphTypes };
89+
}
90+
}
7991
}
8092

8193
var syncResult = projectSnapshot is null

backend/FwLite/FwLiteProjectSync/MiniLcmImport.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using MiniLcm;
99
using MiniLcm.Models;
1010
using MiniLcm.Project;
11+
using MiniLcm.SyncHelpers;
1112

1213
namespace FwLiteProjectSync;
1314

@@ -70,11 +71,10 @@ public async Task ImportProject(IMiniLcmApi importTo, IMiniLcmApi importFrom, in
7071
logger.LogInformation("Imported complex form type {Id}", complexFormType.Id);
7172
}
7273

73-
await foreach (var morphType in importFrom.GetMorphTypes())
74-
{
75-
await importTo.CreateMorphType(morphType);
76-
logger.LogInformation("Imported morph type {Id} ({typ})", morphType.Id, morphType.Kind);
77-
}
74+
// Morph types are created automatically for CRDT projects, so we update them instead of creating them
75+
var importFromMorphTypes = await importFrom.GetMorphTypes().ToArrayAsync();
76+
var existingMorphTypes = await importTo.GetMorphTypes().ToArrayAsync();
77+
await MorphTypeSync.Sync(existingMorphTypes, importFromMorphTypes, importTo);
7878

7979
logger.LogInformation("Importing semantic domains");
8080
await importTo.BulkImportSemanticDomains(importFrom.GetSemanticDomains());

backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ public FilteringTests()
1212
_entries =
1313
[
1414
new Entry { LexemeForm = { { "en", "123" } }, },
15-
new Entry { LexemeForm = { { "en", "456" } }, }
15+
new Entry { LexemeForm = { { "en", "456" } }, },
1616
];
17-
_morphTypes = new MorphType[] { new() { Id = Guid.NewGuid(), Kind = MorphTypeKind.Stem, Name = { ["en"] = "Stem" }, SecondaryOrder = 1 } }.AsQueryable();
17+
_morphTypes = CanonicalMorphTypes.All.Values.ToArray().AsQueryable();
1818
}
1919

2020
[Theory]

backend/FwLite/LcmCrdt.Tests/Data/MigrationTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ public class MigrationTests : IAsyncLifetime
2222
internal static void Init()
2323
{
2424
VerifySystemJson.Initialize();
25+
VerifierSettings.OmitContentFromException();
2526
}
2627

2728
public Task InitializeAsync()

0 commit comments

Comments
 (0)