Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -128,5 +128,6 @@
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageVersion>
<PackageVersion Include="xunit.v3" Version="2.0.2" />
<PackageVersion Include="Testcontainers.Elasticsearch" Version="4.6.0" />
</ItemGroup>
</Project>
1 change: 1 addition & 0 deletions docs-builder.slnx
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
<Project Path="tests-integration/Elastic.Documentation.Api.IntegrationTests/Elastic.Documentation.Api.IntegrationTests.csproj" />
<Project Path="tests-integration/Search.IntegrationTests/Search.IntegrationTests.csproj" />
<Project Path="tests-integration/Mcp.Remote.IntegrationTests/Mcp.Remote.IntegrationTests.csproj" />
<Project Path="tests-integration/Elastic.ContentDateEnrichment.IntegrationTests/Elastic.ContentDateEnrichment.IntegrationTests.csproj" />
</Folder>
<Folder Name="/tests/">
<File Path="tests/Directory.Build.props" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,23 @@ public async Task SyncLookupIndexAsync(string lexicalAlias, Cancel ct)
logger.LogInformation("Content date lookup sync complete");
}

/// <summary>
/// Runs _update_by_query with the enrichment pipeline on all documents.
/// HashedBulkUpdate uses bulk update actions which skip ingest pipelines,
/// so the pipeline never fires during normal indexing. This post-indexing step
/// applies the pipeline to resolve content_last_updated for every document:
/// the pipeline compares each document's content_hash against the lookup from
/// the previous run and either preserves the old date or stamps a new one.
/// </summary>
public async Task ResolveContentDatesAsync(string indexAlias, Cancel ct)
{
logger.LogInformation("Resolving content dates in {Index} via pipeline {Pipeline}", indexAlias, PipelineName);

await operations.UpdateByQueryAsync(indexAlias, PostData.Empty, PipelineName, ct);

logger.LogInformation("Content date resolution complete for {Index}", indexAlias);
}

private string GenerateStagingName() =>
$"{_lookupAlias}-{DateTime.UtcNow:yyyyMMddHHmmss}-{Guid.NewGuid().ToString("N")[..8]}";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ public partial class ElasticsearchMarkdownExporter : IMarkdownExporter, IDisposa
// Content date tracking - enrich policy + pipeline for content_last_updated
private readonly ContentDateEnrichment _contentDateEnrichment;

// Read aliases resolved during StartAsync, used for post-indexing operations
private string _lexicalReadAlias = string.Empty;
private string _semanticReadAlias = string.Empty;

// Per-channel running totals for progress logging
private int _primaryIndexed;
private int _secondaryIndexed;
Expand Down Expand Up @@ -208,6 +212,8 @@ private void ConfigureChannelOptions(string label, IngestChannelOptions<Document
public async ValueTask StartAsync(Cancel ctx = default)
{
var orchestratorContext = await _orchestrator.StartAsync(BootstrapMethod.Failure, ctx);
_lexicalReadAlias = orchestratorContext.PrimaryReadAlias;
_semanticReadAlias = orchestratorContext.SecondaryReadAlias;

_logger.LogInformation(
"Orchestrator started — strategy: {Strategy}, primary: {PrimaryAlias}, secondary: {SecondaryAlias}",
Expand All @@ -218,7 +224,14 @@ public async ValueTask StartAsync(Cancel ctx = default)
public async ValueTask StopAsync(Cancel ctx = default)
{
_ = await _orchestrator.CompleteAsync(null, ctx);
await _contentDateEnrichment.SyncLookupIndexAsync(_lexicalTypeContext.IndexStrategy!.WriteTarget!, ctx);

// Resolve content_last_updated for documents where the ingest pipeline didn't fire.
// HashedBulkUpdate uses bulk update actions, which skip ingest pipelines.
// Use the read alias (-latest) rather than WriteTarget, which is removed after CompleteAsync.
await _contentDateEnrichment.ResolveContentDatesAsync(_lexicalReadAlias, ctx);
await _contentDateEnrichment.ResolveContentDatesAsync(_semanticReadAlias, ctx);

await _contentDateEnrichment.SyncLookupIndexAsync(_lexicalReadAlias, ctx);
}

private async Task PostCompleteAsync(OrchestratorContext<DocumentationDocument> context, Cancel ctx)
Expand Down
Loading
Loading