From fa084e8e67b32f5b76b4cb798749cf4981f456f8 Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Mon, 20 Apr 2026 16:15:58 -0300 Subject: [PATCH 1/8] Introduce bundle upload support and data scrubbing primitives. --- .../Bundling/LinkAllowlistSanitizer.cs | 140 +++++++- .../Uploading/ChangelogUploadService.cs | 106 +++++- .../Changelogs/LinkAllowlistSanitizerTests.cs | 317 ++++++++++++++++++ .../Uploading/ChangelogUploadServiceTests.cs | 91 ++++- 4 files changed, 631 insertions(+), 23 deletions(-) diff --git a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs index 7d72d16ca3..8bdb3aa4d2 100644 --- a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs +++ b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs @@ -2,6 +2,7 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using System.Text.RegularExpressions; using Elastic.Documentation.Configuration.Assembler; using Elastic.Documentation.Diagnostics; using Elastic.Documentation.ReleaseNotes; @@ -10,10 +11,16 @@ namespace Elastic.Changelog.Bundling; /// /// Rewrites PR/issue references that are not in bundle.link_allow_repos to # PRIVATE: sentinels for bundle YAML output. +/// Also provides scrubbing for individual changelog entries and free-text fields. /// -public static class LinkAllowlistSanitizer +public static partial class LinkAllowlistSanitizer { private const string SentinelPrefix = "# PRIVATE:"; + [GeneratedRegex(@"https?://github\.com/(?[A-Za-z0-9_.-]+)/(?[A-Za-z0-9_.-]+)/(?:pull|issues)/\d+", RegexOptions.None)] + private static partial Regex GitHubUrlRegex(); + + [GeneratedRegex(@"(?[A-Za-z0-9_.-]+)/(?[A-Za-z0-9_.-]+)#\d+", RegexOptions.None)] + private static partial Regex ShortFormRefRegex(); /// /// Applies the allowlist to PR/issue strings. References whose resolved owner/repo is not in @@ -106,6 +113,137 @@ public static void EmitAssemblerDiagnostics( } } + /// + /// Builds a list of allowed owner/repo strings from an + /// by collecting every reference repository that is not marked private: true or skip: true. + /// Bare keys (without a slash) are assumed to be under the elastic organization. + /// + public static IReadOnlyList BuildAllowReposFromAssembler(AssemblyConfiguration assembly) + { + var result = new List(); + foreach (var kvp in assembly.ReferenceRepositories) + { + if (kvp.Value.Private || kvp.Value.Skip) + continue; + + var key = kvp.Key; + if (!key.Contains('/')) + key = $"elastic/{key}"; + + result.Add(key); + } + + return result; + } + + /// + /// Applies the allowlist to a single changelog entry. + /// Scrubs Prs, Issues, Description, Impact, and Action fields. + /// + public static bool TryApplyChangelogEntry( + IDiagnosticsCollector collector, + BundledEntry entry, + IReadOnlyList allowRepos, + string defaultOwner, + string? defaultRepo, + out BundledEntry sanitized, + out bool changesApplied) + { + sanitized = entry; + changesApplied = false; + + var allow = BuildAllowSet(allowRepos); + var ownerDefault = string.IsNullOrWhiteSpace(defaultOwner) ? "elastic" : defaultOwner; + var anyRewritten = false; + + var prs = ApplyToReferenceList( + collector, + entry.Prs, + ownerDefault, + defaultRepo, + allow, + "PR", + ref anyRewritten); + if (prs == null && entry.Prs is not null) + return false; + + var issues = ApplyToReferenceList( + collector, + entry.Issues, + ownerDefault, + defaultRepo, + allow, + "issue", + ref anyRewritten); + if (issues == null && entry.Issues is not null) + return false; + + var description = ScrubText(entry.Description, allow, ref anyRewritten); + var impact = ScrubText(entry.Impact, allow, ref anyRewritten); + var action = ScrubText(entry.Action, allow, ref anyRewritten); + + sanitized = entry with + { + Prs = prs, + Issues = issues, + Description = description, + Impact = impact, + Action = action + }; + changesApplied = anyRewritten; + return true; + } + + /// + /// Replaces GitHub references in free text that point to repositories not in + /// . Handles full URLs and owner/repo#N short forms. + /// + internal static string? ScrubText(string? input, HashSet allow, ref bool changed) + { + if (string.IsNullOrWhiteSpace(input)) + return input; + + var anyReplaced = false; + + var result = GitHubUrlRegex().Replace(input, match => + { + var owner = match.Groups["owner"].Value; + var repo = match.Groups["repo"].Value; + var fullName = $"{owner}/{repo}"; + if (allow.Contains(fullName)) + return match.Value; + + anyReplaced = true; + return string.Empty; + }); + + result = ShortFormRefRegex().Replace(result, match => + { + var owner = match.Groups["owner"].Value; + var repo = match.Groups["repo"].Value; + var fullName = $"{owner}/{repo}"; + if (allow.Contains(fullName)) + return match.Value; + + anyReplaced = true; + return string.Empty; + }); + + if (anyReplaced) + changed = true; + + return result; + } + + /// + /// Overload that accepts a list of allowed repos (converts to the internal ). + /// + internal static string? ScrubText(string? input, IReadOnlyList allowRepos, ref bool changed) + { + var allow = BuildAllowSet(allowRepos); + return ScrubText(input, allow, ref changed); + } + private static HashSet BuildAllowSet(IReadOnlyList allowRepos) { var allow = new HashSet(StringComparer.OrdinalIgnoreCase); diff --git a/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs b/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs index 6fd4859e8f..38da7bc566 100644 --- a/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs +++ b/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs @@ -58,30 +58,30 @@ public async Task Upload(IDiagnosticsCollector collector, ChangelogUploadA return true; } - if (args.ArtifactType == ArtifactType.Bundle) - { - _logger.LogWarning("Bundle artifact upload is not yet implemented; skipping"); - return true; - } + var directory = args.ArtifactType == ArtifactType.Bundle + ? await ResolveBundleDirectory(collector, args, ctx) + : await ResolveChangelogDirectory(collector, args, ctx); - var changelogDir = await ResolveChangelogDirectory(collector, args, ctx); - if (changelogDir == null) + if (directory == null) return false; - if (!_fileSystem.Directory.Exists(changelogDir)) + if (!_fileSystem.Directory.Exists(directory)) { - _logger.LogInformation("Changelog directory {Directory} does not exist; nothing to upload", changelogDir); + _logger.LogInformation("{ArtifactType} directory {Directory} does not exist; nothing to upload", args.ArtifactType, directory); return true; } - var targets = DiscoverUploadTargets(collector, changelogDir); + var targets = args.ArtifactType == ArtifactType.Bundle + ? DiscoverBundleUploadTargets(collector, directory) + : DiscoverUploadTargets(collector, directory); + if (targets.Count == 0) { - _logger.LogInformation("No changelog files found to upload in {Directory}", changelogDir); + _logger.LogInformation("No {ArtifactType} files found to upload in {Directory}", args.ArtifactType, directory); return true; } - _logger.LogInformation("Found {Count} upload target(s) from {Directory}", targets.Count, changelogDir); + _logger.LogInformation("Found {Count} {ArtifactType} upload target(s) from {Directory}", targets.Count, args.ArtifactType, directory); using var defaultClient = s3Client == null ? new AmazonS3Client() : null; var client = s3Client ?? defaultClient!; @@ -164,6 +164,76 @@ private List ReadProductsFromFragment(string filePath) } } + private static readonly YamlDotNet.Serialization.IDeserializer BundleDeserializer = + ReleaseNotesSerialization.GetEntryDeserializer(); + + internal IReadOnlyList DiscoverBundleUploadTargets(IDiagnosticsCollector collector, string bundleDir) + { + var rootDir = _fileSystem.DirectoryInfo.New(bundleDir); + + var yamlFiles = _fileSystem.Directory.GetFiles(bundleDir, "*.yaml", SearchOption.TopDirectoryOnly) + .Concat(_fileSystem.Directory.GetFiles(bundleDir, "*.yml", SearchOption.TopDirectoryOnly)) + .ToList(); + + var targets = new List(); + + foreach (var filePath in yamlFiles) + { + var fileInfo = _fileSystem.FileInfo.New(filePath); + if (SymlinkValidator.ValidateFileAccess(fileInfo, rootDir) is { } accessError) + { + collector.EmitWarning(filePath, $"Skipping: {accessError}"); + continue; + } + + var products = ReadProductsFromBundle(filePath); + if (products.Count == 0) + { + _logger.LogDebug("No products found in bundle {File}, skipping", filePath); + continue; + } + + var fileName = _fileSystem.Path.GetFileName(filePath); + + foreach (var product in products) + { + if (!ProductNameRegex().IsMatch(product)) + { + collector.EmitWarning(filePath, $"Skipping invalid product name \"{product}\" (must match [a-zA-Z0-9_-]+)"); + continue; + } + + var s3Key = $"{product}/bundles/{fileName}"; + targets.Add(new UploadTarget(filePath, s3Key)); + } + } + + return targets; + } + + private List ReadProductsFromBundle(string filePath) + { + try + { + var content = _fileSystem.File.ReadAllText(filePath); + var bundle = BundleDeserializer.Deserialize(content); + if (bundle?.Products == null) + return []; + + return bundle.Products + .Select(p => p?.Product) + .Where(p => !string.IsNullOrWhiteSpace(p)) + .Select(p => p!) + .Distinct() + .ToList(); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Could not read products from bundle {File}", filePath); + return []; + } + } + private async Task ResolveChangelogDirectory(IDiagnosticsCollector collector, ChangelogUploadArguments args, Cancel ctx) { if (!string.IsNullOrWhiteSpace(args.Directory)) @@ -175,4 +245,16 @@ private List ReadProductsFromFragment(string filePath) var config = await _configLoader.LoadChangelogConfiguration(collector, args.Config, ctx); return config?.Bundle?.Directory ?? "docs/changelog"; } + + private async Task ResolveBundleDirectory(IDiagnosticsCollector collector, ChangelogUploadArguments args, Cancel ctx) + { + if (!string.IsNullOrWhiteSpace(args.Directory)) + return args.Directory; + + if (_configLoader == null) + return "docs/releases"; + + var config = await _configLoader.LoadChangelogConfiguration(collector, args.Config, ctx); + return config?.Bundle?.OutputDirectory ?? "docs/releases"; + } } diff --git a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs index 51bf684d1c..503602efb4 100644 --- a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs +++ b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs @@ -365,4 +365,321 @@ public void FormatIssueLinkAsciidoc_Sentinel_ReturnsEmpty() var s = ChangelogTextUtilities.FormatIssueLinkAsciidoc("# PRIVATE: https://github.com/elastic/x/issues/1", "x", hidePrivateLinks: false); s.Should().BeEmpty(); } + + // --- BuildAllowReposFromAssembler --- + + [Fact] + public void BuildAllowReposFromAssembler_SkipsPrivateAndSkipped() + { + var yaml = + """ + references: + elastic/elasticsearch: + private: false + elastic/kibana: + private: false + elastic/secret-team: + private: true + elastic/old-repo: + skip: true + """; + var asm = AssemblyConfiguration.Deserialize(yaml, skipPrivateRepositories: false); + var allow = LinkAllowlistSanitizer.BuildAllowReposFromAssembler(asm); + + allow.Should().Contain("elastic/elasticsearch"); + allow.Should().Contain("elastic/kibana"); + allow.Should().NotContain("elastic/secret-team"); + allow.Should().NotContain("elastic/old-repo"); + } + + [Fact] + public void BuildAllowReposFromAssembler_DefaultsOwnerToElastic() + { + var yaml = + """ + references: + beats: {} + """; + var asm = AssemblyConfiguration.Deserialize(yaml, skipPrivateRepositories: false); + var allow = LinkAllowlistSanitizer.BuildAllowReposFromAssembler(asm); + + allow.Should().Contain("elastic/beats"); + } + + [Fact] + public void BuildAllowReposFromAssembler_EmptyReferences_ReturnsEmpty() + { + var asm = AssemblyConfiguration.Deserialize("references: {}", skipPrivateRepositories: false); + var allow = LinkAllowlistSanitizer.BuildAllowReposFromAssembler(asm); + + allow.Should().BeEmpty(); + } + + // --- TryApplyChangelogEntry --- + + [Fact] + public void TryApplyChangelogEntry_ScrubsPrsAndIssues() + { + var entry = new BundledEntry + { + Title = "Fix something", + Prs = ["https://github.com/elastic/secret-repo/pull/1"], + Issues = ["https://github.com/elastic/elasticsearch/issues/200"] + }; + + var allow = new[] { "elastic/elasticsearch" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", "elasticsearch", + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeTrue(); + sanitized.Prs![0].Should().StartWith("# PRIVATE:"); + sanitized.Issues![0].Should().Be("https://github.com/elastic/elasticsearch/issues/200"); + } + + [Fact] + public void TryApplyChangelogEntry_ScrubsDescriptionText() + { + var entry = new BundledEntry + { + Title = "Fix something", + Description = "Fixes elastic/secret-repo#42 which caused issues in https://github.com/elastic/elasticsearch/pull/100" + }; + + var allow = new[] { "elastic/elasticsearch" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", "elasticsearch", + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeTrue(); + sanitized.Description.Should().NotContain("secret-repo"); + sanitized.Description.Should().Contain("https://github.com/elastic/elasticsearch/pull/100"); + } + + [Fact] + public void TryApplyChangelogEntry_ScrubsImpactAndAction() + { + var entry = new BundledEntry + { + Title = "Breaking change", + Impact = "See elastic/private-infra#10 for details", + Action = "Migrate using https://github.com/elastic/private-infra/pull/11" + }; + + var allow = new[] { "elastic/elasticsearch" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", "elasticsearch", + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeTrue(); + sanitized.Impact.Should().NotContain("private-infra"); + sanitized.Action.Should().NotContain("private-infra"); + } + + [Fact] + public void TryApplyChangelogEntry_AllAllowed_NoChanges() + { + var entry = new BundledEntry + { + Title = "Good entry", + Prs = ["https://github.com/elastic/elasticsearch/pull/1"], + Issues = ["elastic/kibana#2"], + Description = "Relates to elastic/kibana#2" + }; + + var allow = new[] { "elastic/elasticsearch", "elastic/kibana" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", "elasticsearch", + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeFalse(); + sanitized.Prs![0].Should().Be("https://github.com/elastic/elasticsearch/pull/1"); + sanitized.Issues![0].Should().Be("elastic/kibana#2"); + sanitized.Description.Should().Be("Relates to elastic/kibana#2"); + } + + [Fact] + public void TryApplyChangelogEntry_NullFields_PreservesNulls() + { + var entry = new BundledEntry + { + Title = "Simple entry", + Prs = null, + Issues = null, + Description = null, + Impact = null, + Action = null + }; + + var allow = new[] { "elastic/elasticsearch" }; + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", "elasticsearch", + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeFalse(); + sanitized.Prs.Should().BeNull(); + sanitized.Issues.Should().BeNull(); + sanitized.Description.Should().BeNull(); + } + + // --- ScrubText --- + + [Fact] + public void ScrubText_ReplacesPrivateGitHubUrl() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + "See https://github.com/elastic/secret-repo/pull/42 for details", + new[] { "elastic/elasticsearch" }, + ref changed); + + changed.Should().BeTrue(); + result.Should().NotContain("secret-repo"); + result.Should().Contain("for details"); + } + + [Fact] + public void ScrubText_ReplacesPrivateShortForm() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + "Related to elastic/private-team#99", + new[] { "elastic/elasticsearch" }, + ref changed); + + changed.Should().BeTrue(); + result.Should().NotContain("private-team"); + } + + [Fact] + public void ScrubText_PreservesAllowedReferences() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + "Fixed in https://github.com/elastic/elasticsearch/pull/100 and elastic/kibana#50", + new[] { "elastic/elasticsearch", "elastic/kibana" }, + ref changed); + + changed.Should().BeFalse(); + result.Should().Contain("https://github.com/elastic/elasticsearch/pull/100"); + result.Should().Contain("elastic/kibana#50"); + } + + [Fact] + public void ScrubText_NullInput_ReturnsNull() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + null, + new[] { "elastic/elasticsearch" }, + ref changed); + + changed.Should().BeFalse(); + result.Should().BeNull(); + } + + [Fact] + public void ScrubText_EmptyInput_ReturnsEmpty() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + "", + new[] { "elastic/elasticsearch" }, + ref changed); + + changed.Should().BeFalse(); + result.Should().BeEmpty(); + } + + [Fact] + public void ScrubText_NoReferences_ReturnsUnchanged() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + "This is plain text with no GitHub references.", + new[] { "elastic/elasticsearch" }, + ref changed); + + changed.Should().BeFalse(); + result.Should().Be("This is plain text with no GitHub references."); + } + + [Fact] + public void ScrubText_MixedReferences_ScrubsOnlyPrivate() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + "Public elastic/elasticsearch#1 and private elastic/secret#2", + new[] { "elastic/elasticsearch" }, + ref changed); + + changed.Should().BeTrue(); + result.Should().Contain("elastic/elasticsearch#1"); + result.Should().NotContain("elastic/secret#2"); + } + + // --- Idempotency --- + + [Fact] + public void TryApplyChangelogEntry_Idempotent_SecondPassNoChanges() + { + var entry = new BundledEntry + { + Title = "Fix something", + Prs = ["https://github.com/elastic/secret-repo/pull/1"], + Description = "See elastic/secret-repo#42" + }; + + var allow = new[] { "elastic/elasticsearch" }; + + LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, allow, "elastic", "elasticsearch", + out var firstPass, out _); + + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, firstPass, allow, "elastic", "elasticsearch", + out var secondPass, out var secondChanged); + + ok.Should().BeTrue(); + secondChanged.Should().BeFalse(); + secondPass.Prs![0].Should().Be(firstPass.Prs![0]); + secondPass.Description.Should().Be(firstPass.Description); + } + + [Fact] + public void ScrubText_Idempotent_SecondPassUnchanged() + { + var changed1 = false; + var result1 = LinkAllowlistSanitizer.ScrubText( + "See elastic/secret#1 for details", + new[] { "elastic/elasticsearch" }, + ref changed1); + + var changed2 = false; + var result2 = LinkAllowlistSanitizer.ScrubText( + result1, + new[] { "elastic/elasticsearch" }, + ref changed2); + + changed2.Should().BeFalse(); + result2.Should().Be(result1); + } + + [Fact] + public void ScrubText_IssueUrl_ReplacesPrivate() + { + var changed = false; + var result = LinkAllowlistSanitizer.ScrubText( + "Relates to https://github.com/elastic/secret-repo/issues/99", + new[] { "elastic/elasticsearch" }, + ref changed); + + changed.Should().BeTrue(); + result.Should().NotContain("secret-repo"); + } } diff --git a/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs b/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs index 86b53a470c..9af11f8bda 100644 --- a/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs +++ b/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs @@ -332,30 +332,101 @@ public async Task Upload_ElasticsearchTarget_SkipsWithoutS3Calls() } [Fact] - public async Task Upload_BundleArtifactType_SkipsWithoutS3Calls() + public async Task Upload_BundleArtifactType_UploadsToS3() { - AddChangelog("bundle.yaml", """ - title: Ignored bundle - type: feature + var bundleDir = _mockFileSystem.Path.Join(Paths.WorkingDirectoryRoot.FullName, Guid.NewGuid().ToString(), "releases"); + _mockFileSystem.Directory.CreateDirectory(bundleDir); + var path = _mockFileSystem.Path.Join(bundleDir, "elasticsearch-9.2.0.yaml"); + // language=yaml + _mockFileSystem.AddFile(path, new MockFileData(""" products: - product: elasticsearch - prs: - - "900" - """); + target: 9.2.0 + lifecycle: ga + entries: + - title: New feature + type: feature + """)); + + A.CallTo(() => _s3Client.GetObjectMetadataAsync(A._, A._)) + .Throws(new AmazonS3Exception("Not Found") { StatusCode = HttpStatusCode.NotFound }); + + A.CallTo(() => _s3Client.PutObjectAsync(A._, A._)) + .Returns(new PutObjectResponse()); var args = new ChangelogUploadArguments { ArtifactType = ArtifactType.Bundle, Target = UploadTargetKind.S3, S3BucketName = "test-bucket", - Directory = _changelogDir + Directory = bundleDir }; var ct = TestContext.Current.CancellationToken; var result = await _service.Upload(_collector, args, ct); result.Should().BeTrue(); + _collector.Errors.Should().Be(0); - A.CallTo(() => _s3Client.PutObjectAsync(A._, A._)) - .MustNotHaveHappened(); + A.CallTo(() => _s3Client.PutObjectAsync( + A.That.Matches(r => r.Key == "elasticsearch/bundles/elasticsearch-9.2.0.yaml" && r.BucketName == "test-bucket"), + A._ + )).MustHaveHappenedOnceExactly(); + } + + [Fact] + public void DiscoverBundleUploadTargets_MapsToCorrectS3Key() + { + var bundleDir = _mockFileSystem.Path.Join(Paths.WorkingDirectoryRoot.FullName, Guid.NewGuid().ToString(), "releases"); + _mockFileSystem.Directory.CreateDirectory(bundleDir); + // language=yaml + _mockFileSystem.AddFile(_mockFileSystem.Path.Join(bundleDir, "elasticsearch-9.2.0.yaml"), new MockFileData(""" + products: + - product: elasticsearch + target: 9.2.0 + entries: + - title: Feature + type: feature + """)); + + var targets = _service.DiscoverBundleUploadTargets(_collector, bundleDir); + + targets.Should().HaveCount(1); + targets[0].S3Key.Should().Be("elasticsearch/bundles/elasticsearch-9.2.0.yaml"); + _collector.Errors.Should().Be(0); + } + + [Fact] + public void DiscoverBundleUploadTargets_MultipleProducts_CreatesTargetPerProduct() + { + var bundleDir = _mockFileSystem.Path.Join(Paths.WorkingDirectoryRoot.FullName, Guid.NewGuid().ToString(), "releases"); + _mockFileSystem.Directory.CreateDirectory(bundleDir); + // language=yaml + _mockFileSystem.AddFile(_mockFileSystem.Path.Join(bundleDir, "stack-9.2.0.yaml"), new MockFileData(""" + products: + - product: elasticsearch + target: 9.2.0 + - product: kibana + target: 9.2.0 + entries: + - title: Stack feature + type: feature + """)); + + var targets = _service.DiscoverBundleUploadTargets(_collector, bundleDir); + + targets.Should().HaveCount(2); + targets.Should().Contain(t => t.S3Key == "elasticsearch/bundles/stack-9.2.0.yaml"); + targets.Should().Contain(t => t.S3Key == "kibana/bundles/stack-9.2.0.yaml"); + } + + [Fact] + public void DiscoverBundleUploadTargets_EmptyDirectory_ReturnsEmpty() + { + var bundleDir = _mockFileSystem.Path.Join(Paths.WorkingDirectoryRoot.FullName, Guid.NewGuid().ToString(), "releases"); + _mockFileSystem.Directory.CreateDirectory(bundleDir); + + var targets = _service.DiscoverBundleUploadTargets(_collector, bundleDir); + + targets.Should().BeEmpty(); } } From 031b7acbb3c737907d482dfba1a202417c9777f5 Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Mon, 20 Apr 2026 16:22:36 -0300 Subject: [PATCH 2/8] FIx tests --- .../Changelogs/LinkAllowlistSanitizerTests.cs | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs index 503602efb4..2d48dfb84c 100644 --- a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs +++ b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs @@ -11,6 +11,9 @@ namespace Elastic.Changelog.Tests.Changelogs; public class LinkAllowlistSanitizerTests(ITestOutputHelper output) : ChangelogTestBase(output) { + private static readonly string[] AllowElasticsearch = ["elastic/elasticsearch"]; + private static readonly string[] AllowElasticsearchAndKibana = ["elastic/elasticsearch", "elastic/kibana"]; + [Fact] public void TryGetGitHubRepo_FullUrl_ParsesOwnerRepo() { @@ -535,7 +538,7 @@ public void ScrubText_ReplacesPrivateGitHubUrl() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( "See https://github.com/elastic/secret-repo/pull/42 for details", - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed); changed.Should().BeTrue(); @@ -549,7 +552,7 @@ public void ScrubText_ReplacesPrivateShortForm() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( "Related to elastic/private-team#99", - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed); changed.Should().BeTrue(); @@ -562,7 +565,7 @@ public void ScrubText_PreservesAllowedReferences() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( "Fixed in https://github.com/elastic/elasticsearch/pull/100 and elastic/kibana#50", - new[] { "elastic/elasticsearch", "elastic/kibana" }, + AllowElasticsearchAndKibana, ref changed); changed.Should().BeFalse(); @@ -576,7 +579,7 @@ public void ScrubText_NullInput_ReturnsNull() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( null, - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed); changed.Should().BeFalse(); @@ -589,7 +592,7 @@ public void ScrubText_EmptyInput_ReturnsEmpty() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( "", - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed); changed.Should().BeFalse(); @@ -602,7 +605,7 @@ public void ScrubText_NoReferences_ReturnsUnchanged() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( "This is plain text with no GitHub references.", - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed); changed.Should().BeFalse(); @@ -615,7 +618,7 @@ public void ScrubText_MixedReferences_ScrubsOnlyPrivate() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( "Public elastic/elasticsearch#1 and private elastic/secret#2", - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed); changed.Should().BeTrue(); @@ -657,13 +660,13 @@ public void ScrubText_Idempotent_SecondPassUnchanged() var changed1 = false; var result1 = LinkAllowlistSanitizer.ScrubText( "See elastic/secret#1 for details", - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed1); var changed2 = false; var result2 = LinkAllowlistSanitizer.ScrubText( result1, - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed2); changed2.Should().BeFalse(); @@ -676,7 +679,7 @@ public void ScrubText_IssueUrl_ReplacesPrivate() var changed = false; var result = LinkAllowlistSanitizer.ScrubText( "Relates to https://github.com/elastic/secret-repo/issues/99", - new[] { "elastic/elasticsearch" }, + AllowElasticsearch, ref changed); changed.Should().BeTrue(); From aacd6e3f2b71a18bbe75456d8c6a408f1dd480fc Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Mon, 20 Apr 2026 16:51:20 -0300 Subject: [PATCH 3/8] Add changelog scrubber lambda --- .../build-changelog-scrubber-lambda.yml | 46 ++++ .github/workflows/release.yml | 69 ++++- .../docs-lambda-changelog-scrubber/Program.cs | 235 ++++++++++++++++++ .../docs-lambda-changelog-scrubber/README.md | 42 ++++ .../SerializerContext.cs | 14 ++ .../aws-lambda-tools-defaults.json | 15 ++ .../docs-lambda-changelog-scrubber.csproj | 38 +++ .../lambda.DockerFile | 34 +++ 8 files changed, 485 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/build-changelog-scrubber-lambda.yml create mode 100644 src/infra/docs-lambda-changelog-scrubber/Program.cs create mode 100644 src/infra/docs-lambda-changelog-scrubber/README.md create mode 100644 src/infra/docs-lambda-changelog-scrubber/SerializerContext.cs create mode 100644 src/infra/docs-lambda-changelog-scrubber/aws-lambda-tools-defaults.json create mode 100644 src/infra/docs-lambda-changelog-scrubber/docs-lambda-changelog-scrubber.csproj create mode 100644 src/infra/docs-lambda-changelog-scrubber/lambda.DockerFile diff --git a/.github/workflows/build-changelog-scrubber-lambda.yml b/.github/workflows/build-changelog-scrubber-lambda.yml new file mode 100644 index 0000000000..6265d75ce4 --- /dev/null +++ b/.github/workflows/build-changelog-scrubber-lambda.yml @@ -0,0 +1,46 @@ +--- +# This workflow builds the changelog-scrubber Lambda function bootstrap binary +# that can be deployed to AWS Lambda. +name: Build Changelog Scrubber Lambda + +on: + workflow_dispatch: + workflow_call: + inputs: + ref: + required: false + type: string + default: ${{ github.ref }} + +permissions: {} + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + env: + BINARY_PATH: .artifacts/docs-lambda-changelog-scrubber/release_linux-x64/bootstrap + steps: + - uses: actions/checkout@v6 + with: + ref: ${{ inputs.ref }} + persist-credentials: false + - name: Amazon Linux 2023 build + run: | + docker build . -t changelog-scrubber:latest -f src/infra/docs-lambda-changelog-scrubber/lambda.DockerFile + - name: Get bootstrap binary + run: | + docker cp "$(docker create --name tc changelog-scrubber:latest)":/app/.artifacts/publish ./.artifacts && docker rm tc + - name: Inspect bootstrap binary + run: | + tree .artifacts + stat "${BINARY_PATH}" + - name: Archive artifact + id: upload-artifact + uses: actions/upload-artifact@v7 + with: + name: changelog-scrubber-lambda-binary + retention-days: 1 + if-no-files-found: error + path: ${{ env.BINARY_PATH }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cc86a8d8a4..4982e8b987 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -69,19 +69,26 @@ jobs: - name: Publish Containers run: ./build.sh publishcontainers - build-lambda: + build-link-index-lambda: needs: - release-drafter uses: ./.github/workflows/build-link-index-updater-lambda.yml with: ref: ${{ needs.release-drafter.outputs.tag_name }} - + + build-changelog-scrubber-lambda: + needs: + - release-drafter + uses: ./.github/workflows/build-changelog-scrubber-lambda.yml + with: + ref: ${{ needs.release-drafter.outputs.tag_name }} + deploy-link-index-updater-lambda-prod: environment: name: link-index-updater-prod runs-on: ubuntu-latest needs: - - build-lambda + - build-link-index-lambda - release-drafter permissions: contents: write @@ -93,8 +100,8 @@ jobs: - name: Download bootstrap binary uses: actions/download-artifact@v8 with: - name: link-index-updater-lambda-binary # Defined in build-link-index-updater-lambda.yml - + name: link-index-updater-lambda-binary + - name: Create zip run: | zip -j "${ZIP_FILE}" ./bootstrap @@ -113,7 +120,50 @@ jobs: - name: Attach Distribution to release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: gh release upload --repo ${{ github.repository }} ${{ needs.release-drafter.outputs.tag_name }} "${ZIP_FILE}" + TAG_NAME: ${{ needs.release-drafter.outputs.tag_name }} + REPO: ${{ github.repository }} + run: gh release upload --repo "$REPO" "$TAG_NAME" "${ZIP_FILE}" + + deploy-changelog-scrubber-lambda-prod: + environment: + name: changelog-scrubber-prod + runs-on: ubuntu-latest + needs: + - build-changelog-scrubber-lambda + - release-drafter + permissions: + contents: write + id-token: write + env: + ZIP_FILE: changelog-scrubber-lambda.zip + steps: + + - name: Download bootstrap binary + uses: actions/download-artifact@v8 + with: + name: changelog-scrubber-lambda-binary + + - name: Create zip + run: | + zip -j "${ZIP_FILE}" ./bootstrap + + - uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6.1.0 + with: + role-to-assume: arn:aws:iam::197730964718:role/elastic-docs-v3-changelog-scrubber-deployer + aws-region: us-east-1 + + - name: Upload Lambda function + run: | + aws lambda update-function-code \ + --function-name elastic-docs-v3-changelog-scrubber \ + --zip-file "fileb://${ZIP_FILE}" + + - name: Attach Distribution to release + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TAG_NAME: ${{ needs.release-drafter.outputs.tag_name }} + REPO: ${{ github.repository }} + run: gh release upload --repo "$REPO" "$TAG_NAME" "${ZIP_FILE}" release: needs: @@ -159,8 +209,9 @@ jobs: - name: Attach Distribution to release env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TAG_NAME: ${{ needs.release-drafter.outputs.tag_name }} run: | - gh release upload ${{ needs.release-drafter.outputs.tag_name }} .artifacts/publish/docs-builder/release/*.zip + gh release upload "$TAG_NAME" .artifacts/publish/docs-builder/release/*.zip shell: bash publish-release: @@ -169,6 +220,7 @@ jobs: - containers - release-drafter - deploy-link-index-updater-lambda-prod + - deploy-changelog-scrubber-lambda-prod runs-on: ubuntu-latest permissions: contents: write @@ -177,5 +229,6 @@ jobs: env: GH_TOKEN: ${{ github.token }} TAG_NAME: ${{ needs.release-drafter.outputs.tag_name }} + REPO: ${{ github.repository }} run: | - gh release edit ${{ needs.release-drafter.outputs.tag_name }} --draft=false --latest --repo ${{ github.repository }} + gh release edit "$TAG_NAME" --draft=false --latest --repo "$REPO" diff --git a/src/infra/docs-lambda-changelog-scrubber/Program.cs b/src/infra/docs-lambda-changelog-scrubber/Program.cs new file mode 100644 index 0000000000..fed8748653 --- /dev/null +++ b/src/infra/docs-lambda-changelog-scrubber/Program.cs @@ -0,0 +1,235 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Net; +using System.Reflection; +using System.Text.Json; +using Amazon.Lambda.Core; +using Amazon.Lambda.RuntimeSupport; +using Amazon.Lambda.Serialization.SystemTextJson; +using Amazon.Lambda.SQSEvents; +using Amazon.S3; +using Amazon.S3.Model; +using Amazon.S3.Util; +using Elastic.Changelog.Bundling; +using Elastic.Documentation.Configuration.Assembler; +using Elastic.Documentation.Configuration.ReleaseNotes; +using Elastic.Documentation.Diagnostics; +using Elastic.Documentation.Lambda.ChangelogScrubber; +using Elastic.Documentation.ReleaseNotes; + +var publicBucketName = Environment.GetEnvironmentVariable("PUBLIC_BUCKET_NAME") + ?? throw new InvalidOperationException("PUBLIC_BUCKET_NAME environment variable is required"); + +var allowRepos = BuildAllowlist(); + +await LambdaBootstrapBuilder + .Create(Handler, new SourceGeneratorLambdaJsonSerializer()) + .Build() + .RunAsync(); + +return; + +IReadOnlyList BuildAllowlist() +{ + using var stream = Assembly.GetExecutingAssembly().GetManifestResourceStream("assembler.yml") + ?? throw new InvalidOperationException("Embedded assembler.yml not found"); + using var reader = new StreamReader(stream); + var yaml = reader.ReadToEnd(); + var assembly = AssemblyConfiguration.Deserialize(yaml, skipPrivateRepositories: false); + return LinkAllowlistSanitizer.BuildAllowReposFromAssembler(assembly); +} + +async Task Handler(SQSEvent ev, ILambdaContext context) +{ + using var s3Client = new AmazonS3Client(); + var batchItemFailures = new List(); + + foreach (var message in ev.Records) + { + try + { + var s3Event = S3EventNotification.ParseJson(message.Body); + foreach (var record in s3Event.Records) + { + var key = Uri.UnescapeDataString(record.S3.Object.Key.Replace('+', ' ')); + var sourceBucket = record.S3.Bucket.Name; + var eventName = record.EventName; + + context.Logger.LogInformation("Processing event={EventName} key={Key}", eventName, key); + + if (eventName.Value.Contains("ObjectRemoved")) + { + await DeleteFromPublicBucket(s3Client, key, context); + } + else if (eventName.Value.Contains("ObjectCreated")) + { + await ScrubAndCopyToPublicBucket(s3Client, sourceBucket, key, context); + } + else + { + context.Logger.LogWarning("Ignoring unhandled event type: {EventName}", eventName); + } + } + } + catch (Exception e) + { + context.Logger.LogWarning(e, "Failed to process message {MessageId}", message.MessageId); + batchItemFailures.Add(new SQSBatchResponse.BatchItemFailure { ItemIdentifier = message.MessageId }); + } + } + + var response = new SQSBatchResponse(batchItemFailures); + if (batchItemFailures.Count > 0) + context.Logger.LogInformation("Failed {FailedCount} of {TotalCount} messages", batchItemFailures.Count, ev.Records.Count); + + var jsonStr = JsonSerializer.Serialize(response, SerializerContext.Default.SQSBatchResponse); + context.Logger.LogInformation(jsonStr); + return response; +} + +async Task DeleteFromPublicBucket(IAmazonS3 s3Client, string key, ILambdaContext context) +{ + try + { + _ = await s3Client.DeleteObjectAsync(new DeleteObjectRequest + { + BucketName = publicBucketName, + Key = key + }); + context.Logger.LogInformation("Deleted {Key} from public bucket", key); + } + catch (AmazonS3Exception e) when (e.StatusCode == HttpStatusCode.NotFound) + { + context.Logger.LogInformation("Key {Key} already absent from public bucket", key); + } +} + +async Task ScrubAndCopyToPublicBucket(IAmazonS3 s3Client, string sourceBucket, string key, ILambdaContext context) +{ + if (key.EndsWith(".json", StringComparison.OrdinalIgnoreCase)) + { + await CopyPassThrough(s3Client, sourceBucket, key, context); + return; + } + + if (!key.EndsWith(".yaml", StringComparison.OrdinalIgnoreCase) && + !key.EndsWith(".yml", StringComparison.OrdinalIgnoreCase)) + { + context.Logger.LogInformation("Skipping non-YAML key: {Key}", key); + return; + } + + var getResponse = await s3Client.GetObjectAsync(new GetObjectRequest + { + BucketName = sourceBucket, + Key = key + }); + + string content; + using (var reader = new StreamReader(getResponse.ResponseStream)) + content = await reader.ReadToEndAsync(); + + var scrubbed = await ScrubContent(key, content, context); + + _ = await s3Client.PutObjectAsync(new PutObjectRequest + { + BucketName = publicBucketName, + Key = key, + ContentBody = scrubbed, + ContentType = "application/yaml" + }); + + context.Logger.LogInformation("Scrubbed and wrote {Key} to public bucket", key); +} + +async Task CopyPassThrough(IAmazonS3 s3Client, string sourceBucket, string key, ILambdaContext context) +{ + _ = await s3Client.CopyObjectAsync(new CopyObjectRequest + { + SourceBucket = sourceBucket, + SourceKey = key, + DestinationBucket = publicBucketName, + DestinationKey = key + }); + context.Logger.LogInformation("Copied {Key} to public bucket (pass-through)", key); +} + +async Task ScrubContent(string key, string content, ILambdaContext context) +{ + var isBundlePath = key.Contains("/bundles/", StringComparison.OrdinalIgnoreCase); + + if (isBundlePath) + return await ScrubBundle(content, context); + + return await ScrubChangelog(content, context); +} + +async Task ScrubBundle(string content, ILambdaContext context) +{ + var bundle = ReleaseNotesSerialization.DeserializeBundle(content); + var owner = bundle.Products.Count > 0 ? bundle.Products[0].Owner ?? "elastic" : "elastic"; + var repo = bundle.Products.Count > 0 ? bundle.Products[0].Repo : null; + + await using var collector = new DiagnosticsCollector([]); + if (!LinkAllowlistSanitizer.TryApplyBundle(collector, bundle, allowRepos, owner, repo, out var sanitized, out var changed)) + { + context.Logger.LogWarning("Failed to apply allowlist to bundle, writing original content"); + return content; + } + + if (!changed) + { + context.Logger.LogInformation("Bundle had no private references, writing unchanged"); + return content; + } + + return ReleaseNotesSerialization.SerializeBundle(sanitized); +} + +async Task ScrubChangelog(string content, ILambdaContext context) +{ + var normalized = ReleaseNotesSerialization.NormalizeYaml(content); + var entry = ReleaseNotesSerialization.DeserializeEntry(normalized); + + var bundledEntry = new BundledEntry + { + Type = entry.Type, + Title = entry.Title, + Description = entry.Description, + Impact = entry.Impact, + Action = entry.Action, + Prs = entry.Prs, + Issues = entry.Issues, + Areas = entry.Areas, + Highlight = entry.Highlight, + Subtype = entry.Subtype + }; + + await using var collector = new DiagnosticsCollector([]); + if (!LinkAllowlistSanitizer.TryApplyChangelogEntry( + collector, bundledEntry, allowRepos, "elastic", null, + out var sanitized, out var changed)) + { + context.Logger.LogWarning("Failed to apply allowlist to changelog entry, writing original content"); + return content; + } + + if (!changed) + { + context.Logger.LogInformation("Changelog entry had no private references, writing unchanged"); + return content; + } + + var scrubEntry = entry with + { + Description = sanitized.Description, + Impact = sanitized.Impact, + Action = sanitized.Action, + Prs = sanitized.Prs, + Issues = sanitized.Issues + }; + + return ReleaseNotesSerialization.SerializeEntry(scrubEntry); +} diff --git a/src/infra/docs-lambda-changelog-scrubber/README.md b/src/infra/docs-lambda-changelog-scrubber/README.md new file mode 100644 index 0000000000..9e22be860d --- /dev/null +++ b/src/infra/docs-lambda-changelog-scrubber/README.md @@ -0,0 +1,42 @@ +# Changelog Scrubber Lambda Function + +SQS-triggered Lambda that reads changelog/bundle YAML from the private S3 bucket, +scrubs private repository references using `LinkAllowlistSanitizer`, and writes +sanitized copies to the public S3 bucket. + +The public repo allowlist is derived from `config/assembler.yml` (baked into the +Lambda image as an embedded resource at build time). Changes to `assembler.yml` +trigger a Lambda redeploy via CI. + +## Build + +From a linux `x86_64` machine (or Docker): + +```bash +docker build . -t changelog-scrubber:latest -f src/infra/docs-lambda-changelog-scrubber/lambda.DockerFile +``` + +Copy the published artifacts from the image: + +```bash +docker cp $(docker create --name tc changelog-scrubber:latest):/app/.artifacts/publish ./.artifacts && docker rm tc +``` + +The `bootstrap` binary should be available under: + +``` +.artifacts/publish/docs-lambda-changelog-scrubber/release_linux-x64/bootstrap +``` + +## Event handling + +- **`s3:ObjectCreated:*`** on `.yaml`/`.yml` files: read from private bucket, scrub private references, write to public bucket +- **`s3:ObjectCreated:*`** on `.json` files: copy as-is (pass-through for `registry-index.json`) +- **`s3:ObjectRemoved:*`**: delete the same key from the public bucket +- Other keys are silently skipped + +## Scrubbing logic + +- **Bundle files** (`{product}/bundles/*.yaml`): `LinkAllowlistSanitizer.TryApplyBundle` scrubs `prs`/`issues` lists +- **Changelog entries** (`{product}/changelogs/*.yaml`): `LinkAllowlistSanitizer.TryApplyChangelogEntry` scrubs `prs`, `issues`, `description`, `impact`, `action` +- The allowlist is built once at cold start from the embedded `assembler.yml` via `BuildAllowReposFromAssembler` diff --git a/src/infra/docs-lambda-changelog-scrubber/SerializerContext.cs b/src/infra/docs-lambda-changelog-scrubber/SerializerContext.cs new file mode 100644 index 0000000000..288ea28d9d --- /dev/null +++ b/src/infra/docs-lambda-changelog-scrubber/SerializerContext.cs @@ -0,0 +1,14 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Text.Json.Serialization; +using Amazon.Lambda.SQSEvents; +using Amazon.S3.Util; + +namespace Elastic.Documentation.Lambda.ChangelogScrubber; + +[JsonSerializable(typeof(SQSEvent))] +[JsonSerializable(typeof(S3EventNotification))] +[JsonSerializable(typeof(SQSBatchResponse))] +public partial class SerializerContext : JsonSerializerContext; diff --git a/src/infra/docs-lambda-changelog-scrubber/aws-lambda-tools-defaults.json b/src/infra/docs-lambda-changelog-scrubber/aws-lambda-tools-defaults.json new file mode 100644 index 0000000000..90ac25e959 --- /dev/null +++ b/src/infra/docs-lambda-changelog-scrubber/aws-lambda-tools-defaults.json @@ -0,0 +1,15 @@ +{ + "Information": [ + "This file provides default values for the deployment wizard inside Visual Studio and the AWS Lambda commands added to the .NET Core CLI.", + "To learn more about the Lambda commands with the .NET Core CLI execute the following command at the command line in the project root directory.", + "dotnet lambda help", + "All the command line options for the Lambda command can be specified in this file." + ], + "profile": "default", + "region": "us-east-1", + "configuration": "Release", + "function-runtime": "provided.al2023", + "function-memory-size": 256, + "function-timeout": 30, + "function-handler": "Elastic.Documentation.Lambda.ChangelogScrubber" +} diff --git a/src/infra/docs-lambda-changelog-scrubber/docs-lambda-changelog-scrubber.csproj b/src/infra/docs-lambda-changelog-scrubber/docs-lambda-changelog-scrubber.csproj new file mode 100644 index 0000000000..da29f2ee87 --- /dev/null +++ b/src/infra/docs-lambda-changelog-scrubber/docs-lambda-changelog-scrubber.csproj @@ -0,0 +1,38 @@ + + + Exe + net10.0 + enable + enable + true + + bootstrap + Lambda + + true + true + true + true + false + Linux + + Elastic.Documentation.Lambda.ChangelogScrubber + + + + + + + + + + + + + + + + + + + diff --git a/src/infra/docs-lambda-changelog-scrubber/lambda.DockerFile b/src/infra/docs-lambda-changelog-scrubber/lambda.DockerFile new file mode 100644 index 0000000000..178a6f54a5 --- /dev/null +++ b/src/infra/docs-lambda-changelog-scrubber/lambda.DockerFile @@ -0,0 +1,34 @@ +FROM public.ecr.aws/amazonlinux/amazonlinux:2023 AS base + +ARG TARGETARCH +ARG TARGETOS + +WORKDIR /app + +RUN rpm --import https://packages.microsoft.com/keys/microsoft.asc + +RUN dnf update -y +RUN dnf install -y npm +RUN dnf install -y git +RUN dnf install -y clang + +## temporary see: https://github.com/amazonlinux/amazon-linux-2023/issues/1025 +RUN dnf install -y tar +RUN dnf install -y findutils +RUN curl -L https://dot.net/v1/dotnet-install.sh -o dotnet-install.sh +RUN chmod +x ./dotnet-install.sh +RUN ./dotnet-install.sh +ENV PATH="$PATH:/root/.dotnet" +ENV DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 +# /end temporary + +COPY . . + +ENV DOTNET_NOLOGO=true \ + DOTNET_CLI_TELEMETRY_OPTOUT=true + +RUN arch=$TARGETARCH \ + && if [ "$arch" = "amd64" ]; then arch="x64"; fi \ + && echo $TARGETOS-$arch > /tmp/rid + +RUN dotnet publish src/infra/docs-lambda-changelog-scrubber -r linux-x64 -c Release From 70af61843179ab7863c6da288eec831bd3c12553 Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Tue, 21 Apr 2026 12:51:11 -0300 Subject: [PATCH 4/8] Drop sentinels entries and tighten JSON pass-through --- .../docs-lambda-changelog-scrubber/Program.cs | 19 ++++++++++--------- .../Bundling/LinkAllowlistSanitizer.cs | 17 +++++++++++++++++ .../Changelogs/LinkAllowlistSanitizerTests.cs | 4 ++-- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/infra/docs-lambda-changelog-scrubber/Program.cs b/src/infra/docs-lambda-changelog-scrubber/Program.cs index fed8748653..71c8b58e9c 100644 --- a/src/infra/docs-lambda-changelog-scrubber/Program.cs +++ b/src/infra/docs-lambda-changelog-scrubber/Program.cs @@ -108,12 +108,19 @@ async Task DeleteFromPublicBucket(IAmazonS3 s3Client, string key, ILambdaContext async Task ScrubAndCopyToPublicBucket(IAmazonS3 s3Client, string sourceBucket, string key, ILambdaContext context) { - if (key.EndsWith(".json", StringComparison.OrdinalIgnoreCase)) + var fileName = Path.GetFileName(key); + if (string.Equals(fileName, "registry-index.json", StringComparison.OrdinalIgnoreCase)) { await CopyPassThrough(s3Client, sourceBucket, key, context); return; } + if (key.EndsWith(".json", StringComparison.OrdinalIgnoreCase)) + { + context.Logger.LogWarning("Skipping unapproved JSON key: {Key}", key); + return; + } + if (!key.EndsWith(".yaml", StringComparison.OrdinalIgnoreCase) && !key.EndsWith(".yml", StringComparison.OrdinalIgnoreCase)) { @@ -174,10 +181,7 @@ async Task ScrubBundle(string content, ILambdaContext context) await using var collector = new DiagnosticsCollector([]); if (!LinkAllowlistSanitizer.TryApplyBundle(collector, bundle, allowRepos, owner, repo, out var sanitized, out var changed)) - { - context.Logger.LogWarning("Failed to apply allowlist to bundle, writing original content"); - return content; - } + throw new InvalidOperationException($"Failed to apply allowlist to bundle; errors: {collector.Errors}"); if (!changed) { @@ -211,10 +215,7 @@ async Task ScrubChangelog(string content, ILambdaContext context) if (!LinkAllowlistSanitizer.TryApplyChangelogEntry( collector, bundledEntry, allowRepos, "elastic", null, out var sanitized, out var changed)) - { - context.Logger.LogWarning("Failed to apply allowlist to changelog entry, writing original content"); - return content; - } + throw new InvalidOperationException($"Failed to apply allowlist to changelog entry; errors: {collector.Errors}"); if (!changed) { diff --git a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs index 8bdb3aa4d2..3a509586be 100644 --- a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs +++ b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs @@ -166,6 +166,7 @@ public static bool TryApplyChangelogEntry( ref anyRewritten); if (prs == null && entry.Prs is not null) return false; + prs = DropSentinels(prs, ref anyRewritten); var issues = ApplyToReferenceList( collector, @@ -177,6 +178,7 @@ public static bool TryApplyChangelogEntry( ref anyRewritten); if (issues == null && entry.Issues is not null) return false; + issues = DropSentinels(issues, ref anyRewritten); var description = ScrubText(entry.Description, allow, ref anyRewritten); var impact = ScrubText(entry.Impact, allow, ref anyRewritten); @@ -244,6 +246,21 @@ public static bool TryApplyChangelogEntry( return ScrubText(input, allow, ref changed); } + private static IReadOnlyList? DropSentinels(IReadOnlyList? refs, ref bool changed) + { + if (refs is null) + return null; + + var filtered = refs + .Where(r => !r.StartsWith(SentinelPrefix, StringComparison.OrdinalIgnoreCase)) + .ToList(); + + if (filtered.Count != refs.Count) + changed = true; + + return filtered; + } + private static HashSet BuildAllowSet(IReadOnlyList allowRepos) { var allow = new HashSet(StringComparer.OrdinalIgnoreCase); diff --git a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs index 2d48dfb84c..f65f430dd8 100644 --- a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs +++ b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs @@ -437,7 +437,7 @@ public void TryApplyChangelogEntry_ScrubsPrsAndIssues() ok.Should().BeTrue(); changed.Should().BeTrue(); - sanitized.Prs![0].Should().StartWith("# PRIVATE:"); + sanitized.Prs.Should().BeEmpty(); sanitized.Issues![0].Should().Be("https://github.com/elastic/elasticsearch/issues/200"); } @@ -650,7 +650,7 @@ public void TryApplyChangelogEntry_Idempotent_SecondPassNoChanges() ok.Should().BeTrue(); secondChanged.Should().BeFalse(); - secondPass.Prs![0].Should().Be(firstPass.Prs![0]); + secondPass.Prs.Should().BeEmpty(); secondPass.Description.Should().Be(firstPass.Description); } From efd4941529846bb3b086d077878367a0ade07998 Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Wed, 22 Apr 2026 11:30:22 -0300 Subject: [PATCH 5/8] Enforce stripping strategy and add tests --- .../docs-lambda-changelog-scrubber/Program.cs | 11 +- .../Bundling/LinkAllowlistSanitizer.cs | 54 +++++ .../Changelogs/LinkAllowlistSanitizerTests.cs | 214 ++++++++++++++++++ 3 files changed, 277 insertions(+), 2 deletions(-) diff --git a/src/infra/docs-lambda-changelog-scrubber/Program.cs b/src/infra/docs-lambda-changelog-scrubber/Program.cs index 71c8b58e9c..ea9027db00 100644 --- a/src/infra/docs-lambda-changelog-scrubber/Program.cs +++ b/src/infra/docs-lambda-changelog-scrubber/Program.cs @@ -186,10 +186,14 @@ async Task ScrubBundle(string content, ILambdaContext context) if (!changed) { context.Logger.LogInformation("Bundle had no private references, writing unchanged"); + LinkAllowlistSanitizer.ValidateNoPrivateReferences(content, allowRepos); return content; } - return ReleaseNotesSerialization.SerializeBundle(sanitized); + sanitized = LinkAllowlistSanitizer.StripBundleSentinels(sanitized); + var result = ReleaseNotesSerialization.SerializeBundle(sanitized); + LinkAllowlistSanitizer.ValidateNoPrivateReferences(result, allowRepos); + return result; } async Task ScrubChangelog(string content, ILambdaContext context) @@ -220,6 +224,7 @@ async Task ScrubChangelog(string content, ILambdaContext context) if (!changed) { context.Logger.LogInformation("Changelog entry had no private references, writing unchanged"); + LinkAllowlistSanitizer.ValidateNoPrivateReferences(content, allowRepos); return content; } @@ -232,5 +237,7 @@ async Task ScrubChangelog(string content, ILambdaContext context) Issues = sanitized.Issues }; - return ReleaseNotesSerialization.SerializeEntry(scrubEntry); + var result = ReleaseNotesSerialization.SerializeEntry(scrubEntry); + LinkAllowlistSanitizer.ValidateNoPrivateReferences(result, allowRepos); + return result; } diff --git a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs index 3a509586be..33bb580dc4 100644 --- a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs +++ b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs @@ -237,6 +237,60 @@ public static bool TryApplyChangelogEntry( return result; } + /// + /// Strips # PRIVATE: sentinel entries from all entries in a bundle. + /// Use after when the output is destined for a public bucket. + /// + public static Bundle StripBundleSentinels(Bundle bundle) + { + var newEntries = new List(bundle.Entries.Count); + foreach (var entry in bundle.Entries) + { + var dummy = false; + var prs = DropSentinels(entry.Prs, ref dummy); + var issues = DropSentinels(entry.Issues, ref dummy); + newEntries.Add(entry with { Prs = prs, Issues = issues }); + } + + return bundle with { Entries = newEntries }; + } + + /// + /// Scans serialized YAML for GitHub references not in . + /// Throws if any are found, providing defense-in-depth + /// against new or renamed fields leaking private references. + /// + public static void ValidateNoPrivateReferences(string serializedYaml, IReadOnlyList allowRepos) + { + var allow = BuildAllowSet(allowRepos); + var violations = new List(); + + foreach (var match in GitHubUrlRegex().EnumerateMatches(serializedYaml)) + { + var text = serializedYaml.Substring(match.Index, match.Length); + var m = GitHubUrlRegex().Match(text); + var fullName = $"{m.Groups["owner"].Value}/{m.Groups["repo"].Value}"; + if (!allow.Contains(fullName)) + violations.Add(text); + } + + foreach (var match in ShortFormRefRegex().EnumerateMatches(serializedYaml)) + { + var text = serializedYaml.Substring(match.Index, match.Length); + var m = ShortFormRefRegex().Match(text); + var fullName = $"{m.Groups["owner"].Value}/{m.Groups["repo"].Value}"; + if (!allow.Contains(fullName)) + violations.Add(text); + } + + if (serializedYaml.Contains(SentinelPrefix, StringComparison.OrdinalIgnoreCase)) + violations.Add("Residual # PRIVATE: sentinel found"); + + if (violations.Count > 0) + throw new InvalidOperationException( + $"Post-serialize validation failed: {violations.Count} private reference(s) found in public output: {string.Join(", ", violations)}"); + } + /// /// Overload that accepts a list of allowed repos (converts to the internal ). /// diff --git a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs index f65f430dd8..0e7eac94f6 100644 --- a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs +++ b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs @@ -685,4 +685,218 @@ public void ScrubText_IssueUrl_ReplacesPrivate() changed.Should().BeTrue(); result.Should().NotContain("secret-repo"); } + + // --- StripBundleSentinels --- + + [Fact] + public void StripBundleSentinels_RemovesSentinelsFromPrsAndIssues() + { + var bundle = new Bundle + { + Entries = + [ + new() + { + Title = "Entry with sentinels", + Prs = ["https://github.com/elastic/elasticsearch/pull/1", "# PRIVATE: https://github.com/elastic/secret/pull/2"], + Issues = ["# PRIVATE: elastic/secret#3", "https://github.com/elastic/elasticsearch/issues/4"] + } + ] + }; + + var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + + result.Entries[0].Prs.Should().HaveCount(1); + result.Entries[0].Prs![0].Should().Be("https://github.com/elastic/elasticsearch/pull/1"); + result.Entries[0].Issues.Should().HaveCount(1); + result.Entries[0].Issues![0].Should().Be("https://github.com/elastic/elasticsearch/issues/4"); + } + + [Fact] + public void StripBundleSentinels_AllSentinels_ReturnsEmptyLists() + { + var bundle = new Bundle + { + Entries = + [ + new() + { + Title = "All private", + Prs = ["# PRIVATE: https://github.com/elastic/secret/pull/1"], + Issues = ["# PRIVATE: elastic/secret#2"] + } + ] + }; + + var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + + result.Entries[0].Prs.Should().BeEmpty(); + result.Entries[0].Issues.Should().BeEmpty(); + } + + [Fact] + public void StripBundleSentinels_NullLists_PreservesNull() + { + var bundle = new Bundle + { + Entries = [new() { Title = "No refs", Prs = null, Issues = null }] + }; + + var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + + result.Entries[0].Prs.Should().BeNull(); + result.Entries[0].Issues.Should().BeNull(); + } + + [Fact] + public void StripBundleSentinels_MultipleEntries_StripsAll() + { + var bundle = new Bundle + { + Entries = + [ + new() { Title = "A", Prs = ["# PRIVATE: x"] }, + new() { Title = "B", Issues = ["# PRIVATE: y", "elastic/elasticsearch#1"] } + ] + }; + + var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + + result.Entries[0].Prs.Should().BeEmpty(); + result.Entries[1].Issues.Should().HaveCount(1); + result.Entries[1].Issues![0].Should().Be("elastic/elasticsearch#1"); + } + + // --- ValidateNoPrivateReferences --- + + [Fact] + public void ValidateNoPrivateReferences_CleanYaml_DoesNotThrow() + { + var yaml = """ + title: Feature + prs: + - https://github.com/elastic/elasticsearch/pull/1 + description: Some clean text + """; + + var act = () => LinkAllowlistSanitizer.ValidateNoPrivateReferences(yaml, AllowElasticsearch); + act.Should().NotThrow(); + } + + [Fact] + public void ValidateNoPrivateReferences_PrivateUrl_Throws() + { + var yaml = """ + title: Feature + prs: + - https://github.com/elastic/secret-repo/pull/42 + """; + + var act = () => LinkAllowlistSanitizer.ValidateNoPrivateReferences(yaml, AllowElasticsearch); + act.Should().Throw() + .WithMessage("*secret-repo*"); + } + + [Fact] + public void ValidateNoPrivateReferences_PrivateShortForm_Throws() + { + var yaml = """ + description: See elastic/secret-team#99 + """; + + var act = () => LinkAllowlistSanitizer.ValidateNoPrivateReferences(yaml, AllowElasticsearch); + act.Should().Throw() + .WithMessage("*secret-team*"); + } + + [Fact] + public void ValidateNoPrivateReferences_ResidualSentinel_Throws() + { + var yaml = """ + prs: + - "# PRIVATE: https://github.com/elastic/secret/pull/1" + """; + + var act = () => LinkAllowlistSanitizer.ValidateNoPrivateReferences(yaml, AllowElasticsearch); + act.Should().Throw() + .WithMessage("*PRIVATE*"); + } + + [Fact] + public void ValidateNoPrivateReferences_AllowedUrl_DoesNotThrow() + { + var yaml = "See https://github.com/elastic/elasticsearch/pull/100 and elastic/kibana#50"; + + var act = () => LinkAllowlistSanitizer.ValidateNoPrivateReferences(yaml, AllowElasticsearchAndKibana); + act.Should().NotThrow(); + } + + [Fact] + public void ValidateNoPrivateReferences_EmptyYaml_DoesNotThrow() + { + var act = () => LinkAllowlistSanitizer.ValidateNoPrivateReferences("", AllowElasticsearch); + act.Should().NotThrow(); + } + + [Fact] + public void ValidateNoPrivateReferences_MixedAllowedAndPrivate_Throws() + { + var yaml = "Public https://github.com/elastic/elasticsearch/pull/1 and private https://github.com/elastic/secret/issues/2"; + + var act = () => LinkAllowlistSanitizer.ValidateNoPrivateReferences(yaml, AllowElasticsearch); + act.Should().Throw() + .WithMessage("*secret*"); + } + + // --- TryApplyChangelogEntry mixed scenarios --- + + [Fact] + public void TryApplyChangelogEntry_MixedPrs_KeepsAllowedDropsPrivate() + { + var entry = new BundledEntry + { + Title = "Mixed refs", + Prs = [ + "https://github.com/elastic/elasticsearch/pull/1", + "https://github.com/elastic/secret-repo/pull/2", + "https://github.com/elastic/kibana/pull/3" + ] + }; + + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, AllowElasticsearchAndKibana, "elastic", "elasticsearch", + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeTrue(); + sanitized.Prs.Should().HaveCount(2); + sanitized.Prs.Should().Contain("https://github.com/elastic/elasticsearch/pull/1"); + sanitized.Prs.Should().Contain("https://github.com/elastic/kibana/pull/3"); + sanitized.Prs.Should().NotContain(r => r.Contains("secret-repo")); + } + + [Fact] + public void TryApplyChangelogEntry_PrivateRefsInAllFields_ScrubsEverything() + { + var entry = new BundledEntry + { + Title = "Full scrub test", + Prs = ["https://github.com/elastic/secret/pull/1"], + Issues = ["elastic/secret#2"], + Description = "Caused by https://github.com/elastic/secret/issues/3", + Impact = "See elastic/secret#4", + Action = "Use https://github.com/elastic/secret/pull/5 to migrate" + }; + + var ok = LinkAllowlistSanitizer.TryApplyChangelogEntry( + Collector, entry, AllowElasticsearch, "elastic", "elasticsearch", + out var sanitized, out _); + + ok.Should().BeTrue(); + sanitized.Prs.Should().BeEmpty(); + sanitized.Issues.Should().BeEmpty(); + sanitized.Description.Should().NotContain("secret"); + sanitized.Impact.Should().NotContain("secret"); + sanitized.Action.Should().NotContain("secret"); + } } From 5f67d6ae32da172df8048476cc61cf9f7acf5401 Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Wed, 22 Apr 2026 11:47:50 -0300 Subject: [PATCH 6/8] Use domain-type inference for products, and add safety net to ResolveBundleDirectory --- .../Uploading/ChangelogUploadService.cs | 12 ++----- .../Uploading/ChangelogUploadServiceTests.cs | 33 +++++++++++++++---- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs b/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs index 38da7bc566..b0baa6c81f 100644 --- a/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs +++ b/src/services/Elastic.Changelog/Uploading/ChangelogUploadService.cs @@ -164,9 +164,6 @@ private List ReadProductsFromFragment(string filePath) } } - private static readonly YamlDotNet.Serialization.IDeserializer BundleDeserializer = - ReleaseNotesSerialization.GetEntryDeserializer(); - internal IReadOnlyList DiscoverBundleUploadTargets(IDiagnosticsCollector collector, string bundleDir) { var rootDir = _fileSystem.DirectoryInfo.New(bundleDir); @@ -216,14 +213,11 @@ private List ReadProductsFromBundle(string filePath) try { var content = _fileSystem.File.ReadAllText(filePath); - var bundle = BundleDeserializer.Deserialize(content); - if (bundle?.Products == null) - return []; + var bundle = ReleaseNotesSerialization.DeserializeBundle(content); return bundle.Products - .Select(p => p?.Product) + .Select(p => p.ProductId) .Where(p => !string.IsNullOrWhiteSpace(p)) - .Select(p => p!) .Distinct() .ToList(); } @@ -255,6 +249,6 @@ private List ReadProductsFromBundle(string filePath) return "docs/releases"; var config = await _configLoader.LoadChangelogConfiguration(collector, args.Config, ctx); - return config?.Bundle?.OutputDirectory ?? "docs/releases"; + return config?.Bundle?.OutputDirectory ?? config?.Bundle?.Directory ?? "docs/releases"; } } diff --git a/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs b/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs index 9af11f8bda..90528f4b64 100644 --- a/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs +++ b/tests/Elastic.Changelog.Tests/Uploading/ChangelogUploadServiceTests.cs @@ -343,9 +343,16 @@ public async Task Upload_BundleArtifactType_UploadsToS3() - product: elasticsearch target: 9.2.0 lifecycle: ga + repo: elasticsearch + owner: elastic entries: - - title: New feature - type: feature + - file: + name: 1234-feature.yaml + checksum: abc123def456 + type: enhancement + title: New feature + prs: + - https://github.com/elastic/elasticsearch/pull/1234 """)); A.CallTo(() => _s3Client.GetObjectMetadataAsync(A._, A._)) @@ -383,9 +390,16 @@ public void DiscoverBundleUploadTargets_MapsToCorrectS3Key() products: - product: elasticsearch target: 9.2.0 + repo: elasticsearch + owner: elastic entries: - - title: Feature - type: feature + - file: + name: 5678-bugfix.yaml + checksum: def789abc012 + type: bug-fix + title: Fixed crash on startup + prs: + - https://github.com/elastic/elasticsearch/pull/5678 """)); var targets = _service.DiscoverBundleUploadTargets(_collector, bundleDir); @@ -405,11 +419,18 @@ public void DiscoverBundleUploadTargets_MultipleProducts_CreatesTargetPerProduct products: - product: elasticsearch target: 9.2.0 + repo: elasticsearch - product: kibana target: 9.2.0 + repo: kibana entries: - - title: Stack feature - type: feature + - file: + name: 9999-cross-product.yaml + checksum: aaa111bbb222 + type: enhancement + title: Cross-product improvement + prs: + - https://github.com/elastic/elasticsearch/pull/9999 """)); var targets = _service.DiscoverBundleUploadTargets(_collector, bundleDir); From aaea34af478d0c8650b7a84cae635ad1060ef4f1 Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Wed, 22 Apr 2026 16:40:41 -0300 Subject: [PATCH 7/8] Simplify scrubbing --- .../docs-lambda-changelog-scrubber/Program.cs | 5 +- .../Bundling/LinkAllowlistSanitizer.cs | 38 ++++-- .../Changelogs/LinkAllowlistSanitizerTests.cs | 114 +++++++++++++----- 3 files changed, 119 insertions(+), 38 deletions(-) diff --git a/src/infra/docs-lambda-changelog-scrubber/Program.cs b/src/infra/docs-lambda-changelog-scrubber/Program.cs index ea9027db00..5da56e5125 100644 --- a/src/infra/docs-lambda-changelog-scrubber/Program.cs +++ b/src/infra/docs-lambda-changelog-scrubber/Program.cs @@ -180,8 +180,8 @@ async Task ScrubBundle(string content, ILambdaContext context) var repo = bundle.Products.Count > 0 ? bundle.Products[0].Repo : null; await using var collector = new DiagnosticsCollector([]); - if (!LinkAllowlistSanitizer.TryApplyBundle(collector, bundle, allowRepos, owner, repo, out var sanitized, out var changed)) - throw new InvalidOperationException($"Failed to apply allowlist to bundle; errors: {collector.Errors}"); + if (!LinkAllowlistSanitizer.ScrubBundleForPublic(collector, bundle, allowRepos, owner, repo, out var sanitized, out var changed)) + throw new InvalidOperationException($"Failed to scrub bundle for public output; errors: {collector.Errors}"); if (!changed) { @@ -190,7 +190,6 @@ async Task ScrubBundle(string content, ILambdaContext context) return content; } - sanitized = LinkAllowlistSanitizer.StripBundleSentinels(sanitized); var result = ReleaseNotesSerialization.SerializeBundle(sanitized); LinkAllowlistSanitizer.ValidateNoPrivateReferences(result, allowRepos); return result; diff --git a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs index 33bb580dc4..46e7b54887 100644 --- a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs +++ b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs @@ -238,21 +238,43 @@ public static bool TryApplyChangelogEntry( } /// - /// Strips # PRIVATE: sentinel entries from all entries in a bundle. - /// Use after when the output is destined for a public bucket. + /// Scrubs a bundle for public output by directly removing disallowed references. + /// Unlike (which produces # PRIVATE: sentinels for private-side tooling), + /// this method never creates sentinels. Disallowed PR/issue entries are dropped and text fields are scrubbed. /// - public static Bundle StripBundleSentinels(Bundle bundle) + public static bool ScrubBundleForPublic( + IDiagnosticsCollector collector, + Bundle bundle, + IReadOnlyList allowRepos, + string defaultOwner, + string? defaultBundleRepo, + out Bundle sanitized, + out bool changesApplied) { + sanitized = bundle; + changesApplied = false; + + var anyChanged = false; var newEntries = new List(bundle.Entries.Count); + foreach (var entry in bundle.Entries) { - var dummy = false; - var prs = DropSentinels(entry.Prs, ref dummy); - var issues = DropSentinels(entry.Issues, ref dummy); - newEntries.Add(entry with { Prs = prs, Issues = issues }); + if (!TryApplyChangelogEntry(collector, entry, allowRepos, defaultOwner, defaultBundleRepo, + out var scrubbed, out var entryChanged)) + return false; + + if (entryChanged) + anyChanged = true; + + newEntries.Add(scrubbed); } - return bundle with { Entries = newEntries }; + var allow = BuildAllowSet(allowRepos); + var description = ScrubText(bundle.Description, allow, ref anyChanged); + + sanitized = bundle with { Entries = newEntries, Description = description }; + changesApplied = anyChanged; + return true; } /// diff --git a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs index 0e7eac94f6..851a05472e 100644 --- a/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs +++ b/tests/Elastic.Changelog.Tests/Changelogs/LinkAllowlistSanitizerTests.cs @@ -686,10 +686,10 @@ public void ScrubText_IssueUrl_ReplacesPrivate() result.Should().NotContain("secret-repo"); } - // --- StripBundleSentinels --- + // --- ScrubBundleForPublic --- [Fact] - public void StripBundleSentinels_RemovesSentinelsFromPrsAndIssues() + public void ScrubBundleForPublic_DropsPrivateRefsDirectly() { var bundle = new Bundle { @@ -697,23 +697,29 @@ public void StripBundleSentinels_RemovesSentinelsFromPrsAndIssues() [ new() { - Title = "Entry with sentinels", - Prs = ["https://github.com/elastic/elasticsearch/pull/1", "# PRIVATE: https://github.com/elastic/secret/pull/2"], - Issues = ["# PRIVATE: elastic/secret#3", "https://github.com/elastic/elasticsearch/issues/4"] + Title = "Mixed refs", + Prs = ["https://github.com/elastic/elasticsearch/pull/1", "https://github.com/elastic/secret/pull/2"], + Issues = ["elastic/secret#3", "https://github.com/elastic/elasticsearch/issues/4"], + Description = "See elastic/secret#5 for context" } ] }; - var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + var ok = LinkAllowlistSanitizer.ScrubBundleForPublic( + Collector, bundle, AllowElasticsearch, "elastic", "elasticsearch", + out var sanitized, out var changed); - result.Entries[0].Prs.Should().HaveCount(1); - result.Entries[0].Prs![0].Should().Be("https://github.com/elastic/elasticsearch/pull/1"); - result.Entries[0].Issues.Should().HaveCount(1); - result.Entries[0].Issues![0].Should().Be("https://github.com/elastic/elasticsearch/issues/4"); + ok.Should().BeTrue(); + changed.Should().BeTrue(); + sanitized.Entries[0].Prs.Should().HaveCount(1); + sanitized.Entries[0].Prs![0].Should().Be("https://github.com/elastic/elasticsearch/pull/1"); + sanitized.Entries[0].Issues.Should().HaveCount(1); + sanitized.Entries[0].Issues![0].Should().Be("https://github.com/elastic/elasticsearch/issues/4"); + sanitized.Entries[0].Description.Should().NotContain("secret"); } [Fact] - public void StripBundleSentinels_AllSentinels_ReturnsEmptyLists() + public void ScrubBundleForPublic_AllPrivate_ReturnsEmptyLists() { var bundle = new Bundle { @@ -722,49 +728,103 @@ public void StripBundleSentinels_AllSentinels_ReturnsEmptyLists() new() { Title = "All private", - Prs = ["# PRIVATE: https://github.com/elastic/secret/pull/1"], - Issues = ["# PRIVATE: elastic/secret#2"] + Prs = ["https://github.com/elastic/secret/pull/1"], + Issues = ["elastic/secret#2"] } ] }; - var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + var ok = LinkAllowlistSanitizer.ScrubBundleForPublic( + Collector, bundle, AllowElasticsearch, "elastic", "elasticsearch", + out var sanitized, out _); - result.Entries[0].Prs.Should().BeEmpty(); - result.Entries[0].Issues.Should().BeEmpty(); + ok.Should().BeTrue(); + sanitized.Entries[0].Prs.Should().BeEmpty(); + sanitized.Entries[0].Issues.Should().BeEmpty(); } [Fact] - public void StripBundleSentinels_NullLists_PreservesNull() + public void ScrubBundleForPublic_NullLists_PreservesNull() { var bundle = new Bundle { Entries = [new() { Title = "No refs", Prs = null, Issues = null }] }; - var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + var ok = LinkAllowlistSanitizer.ScrubBundleForPublic( + Collector, bundle, AllowElasticsearch, "elastic", "elasticsearch", + out var sanitized, out var changed); - result.Entries[0].Prs.Should().BeNull(); - result.Entries[0].Issues.Should().BeNull(); + ok.Should().BeTrue(); + changed.Should().BeFalse(); + sanitized.Entries[0].Prs.Should().BeNull(); + sanitized.Entries[0].Issues.Should().BeNull(); } [Fact] - public void StripBundleSentinels_MultipleEntries_StripsAll() + public void ScrubBundleForPublic_MultipleEntries_ScrubsAll() { var bundle = new Bundle { Entries = [ - new() { Title = "A", Prs = ["# PRIVATE: x"] }, - new() { Title = "B", Issues = ["# PRIVATE: y", "elastic/elasticsearch#1"] } + new() { Title = "A", Prs = ["https://github.com/elastic/secret/pull/1"] }, + new() { Title = "B", Issues = ["elastic/secret#2", "elastic/elasticsearch#3"] } ] }; - var result = LinkAllowlistSanitizer.StripBundleSentinels(bundle); + var ok = LinkAllowlistSanitizer.ScrubBundleForPublic( + Collector, bundle, AllowElasticsearch, "elastic", "elasticsearch", + out var sanitized, out _); + + ok.Should().BeTrue(); + sanitized.Entries[0].Prs.Should().BeEmpty(); + sanitized.Entries[1].Issues.Should().HaveCount(1); + sanitized.Entries[1].Issues![0].Should().Be("elastic/elasticsearch#3"); + } - result.Entries[0].Prs.Should().BeEmpty(); - result.Entries[1].Issues.Should().HaveCount(1); - result.Entries[1].Issues![0].Should().Be("elastic/elasticsearch#1"); + [Fact] + public void ScrubBundleForPublic_NeverProducesSentinels() + { + var bundle = new Bundle + { + Entries = + [ + new() + { + Title = "Entry", + Prs = ["https://github.com/elastic/secret/pull/1"], + Description = "See elastic/secret#2" + } + ] + }; + + var ok = LinkAllowlistSanitizer.ScrubBundleForPublic( + Collector, bundle, AllowElasticsearch, "elastic", "elasticsearch", + out var sanitized, out _); + + ok.Should().BeTrue(); + sanitized.Entries[0].Prs.Should().NotContain(r => r.Contains("# PRIVATE:")); + sanitized.Entries[0].Description.Should().NotContain("# PRIVATE:"); + sanitized.Entries[0].Description.Should().NotContain("secret"); + } + + [Fact] + public void ScrubBundleForPublic_ScrubsBundleDescription() + { + var bundle = new Bundle + { + Description = "Release notes referencing elastic/secret#42", + Entries = [new() { Title = "Entry" }] + }; + + var ok = LinkAllowlistSanitizer.ScrubBundleForPublic( + Collector, bundle, AllowElasticsearch, "elastic", "elasticsearch", + out var sanitized, out var changed); + + ok.Should().BeTrue(); + changed.Should().BeTrue(); + sanitized.Description.Should().NotContain("secret"); } // --- ValidateNoPrivateReferences --- From dc385976235b2b71f15c466e06a391eac939afc3 Mon Sep 17 00:00:00 2001 From: Felipe Cotti Date: Wed, 22 Apr 2026 16:51:23 -0300 Subject: [PATCH 8/8] Don't go through sentinel checks --- .../Bundling/LinkAllowlistSanitizer.cs | 74 ++++++++++++++++--- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs index 46e7b54887..611b04c75f 100644 --- a/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs +++ b/src/services/Elastic.Changelog/Bundling/LinkAllowlistSanitizer.cs @@ -156,7 +156,7 @@ public static bool TryApplyChangelogEntry( var ownerDefault = string.IsNullOrWhiteSpace(defaultOwner) ? "elastic" : defaultOwner; var anyRewritten = false; - var prs = ApplyToReferenceList( + var prs = FilterReferenceList( collector, entry.Prs, ownerDefault, @@ -166,9 +166,8 @@ public static bool TryApplyChangelogEntry( ref anyRewritten); if (prs == null && entry.Prs is not null) return false; - prs = DropSentinels(prs, ref anyRewritten); - var issues = ApplyToReferenceList( + var issues = FilterReferenceList( collector, entry.Issues, ownerDefault, @@ -178,7 +177,6 @@ public static bool TryApplyChangelogEntry( ref anyRewritten); if (issues == null && entry.Issues is not null) return false; - issues = DropSentinels(issues, ref anyRewritten); var description = ScrubText(entry.Description, allow, ref anyRewritten); var impact = ScrubText(entry.Impact, allow, ref anyRewritten); @@ -322,19 +320,73 @@ public static void ValidateNoPrivateReferences(string serializedYaml, IReadOnlyL return ScrubText(input, allow, ref changed); } - private static IReadOnlyList? DropSentinels(IReadOnlyList? refs, ref bool changed) + private static IReadOnlyList? FilterReferenceList( + IDiagnosticsCollector collector, + IReadOnlyList? refs, + string defaultOwner, + string? defaultBundleRepo, + HashSet allow, + string referenceKind, + ref bool anyDropped) { if (refs is null) return null; - var filtered = refs - .Where(r => !r.StartsWith(SentinelPrefix, StringComparison.OrdinalIgnoreCase)) - .ToList(); + if (refs.Count == 0) + return refs; - if (filtered.Count != refs.Count) - changed = true; + var list = new List(refs.Count); + foreach (var r in refs) + { + if (string.IsNullOrWhiteSpace(r)) + { + list.Add(r); + continue; + } + + if (r.StartsWith(SentinelPrefix, StringComparison.OrdinalIgnoreCase)) + { + var underlyingRef = r.Substring(SentinelPrefix.Length).Trim(); + if (string.IsNullOrWhiteSpace(underlyingRef)) + continue; - return filtered; + if (!ChangelogTextUtilities.TryGetGitHubRepo(underlyingRef, defaultOwner, defaultBundleRepo ?? string.Empty, out var sOwner, out var sRepo)) + continue; + + if (allow.Contains($"{sOwner}/{sRepo}")) + { + list.Add(underlyingRef); + anyDropped = true; + } + else + anyDropped = true; + + continue; + } + + if (!ChangelogTextUtilities.TryGetGitHubRepo(r, defaultOwner, defaultBundleRepo ?? string.Empty, out var owner, out var repo)) + { + collector.EmitError( + string.Empty, + $"Link allowlist filtering could not parse {referenceKind} reference '{r}'. " + + "Use a full https://github.com/ URL, owner/repo#number, or a bare number with bundle owner/repo set."); + return null; + } + + if (allow.Contains($"{owner}/{repo}")) + { + list.Add(r); + } + else + { + anyDropped = true; + collector.EmitWarning( + string.Empty, + $"PR/issue reference '{r}' targets repository '{owner}/{repo}', which is not in the allowlist. It was removed from public output."); + } + } + + return list; } private static HashSet BuildAllowSet(IReadOnlyList allowRepos)