From 765a664d04ede7d03d44b8f211c786fc568d4cd2 Mon Sep 17 00:00:00 2001 From: Matt Mitchell Date: Wed, 17 Jun 2026 11:55:41 -0700 Subject: [PATCH 1/2] Make scheduled outerloop builds succeed when only Helix tests fail (#129049) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit > [!NOTE] > This pull request was authored with the assistance of GitHub Copilot. Several scheduled outerloop pipelines (the `outerloop.yml` family: `runtime-libraries-coreclr outerloop` and its `-windows`/`-linux`/`-osx` variants) use an `always: false` scheduled trigger. With `always: false`, AzDO only starts a new scheduled run if the source changed **since the last _successful_ scheduled run**. Because the repo has many flaky outerloop tests, the Helix test work items virtually always have at least one failure, which fails the "Send to Helix" step and therefore the whole build. The build never reaches a `succeeded` state, so AzDO re-queues **the same, unchanged commit** day after day, submitting more and more Helix work for no benefit. (Empirically confirmed: a single commit was re-run and failed for 19 consecutive days; once a sibling definition produced a genuinely successful run, the same-SHA re-queue stopped.) `continueOnError: true` only downgrades the build to `partiallySucceeded`, which AzDO's `always: false` scheduler still does **not** treat as successful — so the same commit keeps getting re-queued. The Helix step must end **fully successful** (exit 0). Make the "Send to Helix" step actually succeed on scheduled runs by disabling the two Arcade `Microsoft.DotNet.Helix.Sdk` properties that fail the build (both default to `true`): - **`FailOnWorkItemFailure`** — `CheckHelixJobStatus` errors when a work item exits non-zero. - **`FailOnTestFailure`** — `CheckAzurePipelinesTestResults` errors when any published test failed. Setting both to `false` lets the msbuild step exit 0, producing a fully `succeeded` build. Failed tests are still published and visible in the test results tab; AzDO does not auto-degrade a build to `partiallySucceeded` just because a published test run contains failures — only a failing task would. - **`eng/pipelines/libraries/helix.yml`**: Added a `failOnTestFailures` parameter (default `true`, preserving today's behavior) wired to `/p:FailOnWorkItemFailure` and `/p:FailOnTestFailure` on the Send to Helix msbuild invocation. - **`eng/pipelines/libraries/outerloop.yml`**: Passes `failOnTestFailures: false` **only on scheduled runs** (`Build.Reason == 'Schedule'`) for all three matrix legs (Release, Debug, NET48). The new parameter defaults to `true`, so all other `helix.yml` callers are unaffected (none set `WaitForWorkItemCompletion` or these properties on this path, so they already resolve to `true`). Only scheduled outerloop runs change behavior. PR / rolling / manual outerloop runs continue to fail on Helix failures exactly as before. Build/compile breaks still fail scheduled runs (this only affects the Helix step). On scheduled runs, `FailOnWorkItemFailure=false` also masks work-item crashes/timeouts/infra failures, not just test-assertion failures. This is an accepted tradeoff for the goal of stopping the wasteful daily re-queue of unchanged commits; results remain visible in the Helix/test reporting. --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/pipelines/libraries/helix.yml | 7 +++++++ eng/pipelines/libraries/outerloop.yml | 11 +++++++++++ 2 files changed, 18 insertions(+) diff --git a/eng/pipelines/libraries/helix.yml b/eng/pipelines/libraries/helix.yml index 03b3cf1ebaffca..a64b4a9b3c0605 100644 --- a/eng/pipelines/libraries/helix.yml +++ b/eng/pipelines/libraries/helix.yml @@ -16,6 +16,11 @@ parameters: SuperPmiCollect: '' SuperPmiCollectionType: '' SuperPmiCollectionName: '' + # When false, Helix work item and test failures do not fail the build, so the "Send to Helix" + # step still succeeds. Unlike shouldContinueOnError (which only marks the build as + # partiallySucceeded), this produces a fully successful build. Scheduled builds with + # always:false set this so that flaky tests don't cause AzDO to re-queue the same commit daily. + failOnTestFailures: true steps: - script: $(_msbuildCommand) $(_warnAsErrorParamHelixOverride) -restore @@ -29,6 +34,8 @@ steps: /p:TestScope=${{ parameters.testScope }} /p:TestRunNamePrefixSuffix=${{ parameters.testRunNamePrefixSuffix }} /p:HelixBuild=$(Build.BuildNumber) + /p:FailOnWorkItemFailure=${{ parameters.failOnTestFailures }} + /p:FailOnTestFailure=${{ parameters.failOnTestFailures }} ${{ parameters.extraHelixArguments }} /bl:$(Build.SourcesDirectory)/artifacts/log/$(_BuildConfig)/SendToHelix.binlog displayName: Send to Helix diff --git a/eng/pipelines/libraries/outerloop.yml b/eng/pipelines/libraries/outerloop.yml index afc38926ea35eb..d0b85528bd1bf5 100644 --- a/eng/pipelines/libraries/outerloop.yml +++ b/eng/pipelines/libraries/outerloop.yml @@ -52,6 +52,11 @@ extends: testScope: outerloop creator: dotnet-bot testRunNamePrefixSuffix: CoreCLR_$(_BuildConfig) + # On scheduled runs (always:false) don't fail the build on Helix work item or + # test failures, so flaky outerloop tests don't keep AzDO re-queueing the same + # commit. The Send to Helix step fully succeeds (not partiallySucceeded). + ${{ if eq(variables['Build.Reason'], 'Schedule') }}: + failOnTestFailures: false - ${{ if eq(variables['isRollingBuild'], false) }}: - template: /eng/pipelines/common/platform-matrix.yml @@ -81,6 +86,9 @@ extends: testScope: outerloop creator: dotnet-bot testRunNamePrefixSuffix: CoreCLR_$(_BuildConfig) + # Don't fail scheduled builds on Helix work item/test failures (see above). + ${{ if eq(variables['Build.Reason'], 'Schedule') }}: + failOnTestFailures: false - ${{ if eq(variables['includeWindowsOuterloop'], true) }}: - template: /eng/pipelines/common/platform-matrix.yml @@ -106,3 +114,6 @@ extends: testScope: outerloop creator: dotnet-bot extraHelixArguments: /p:BuildTargetFramework=net481 + # Don't fail scheduled builds on Helix work item/test failures (see above). + ${{ if eq(variables['Build.Reason'], 'Schedule') }}: + failOnTestFailures: false From b1f14a0c7a9961740cd8176fb5c684c759136147 Mon Sep 17 00:00:00 2001 From: Matt Mitchell Date: Fri, 26 Jun 2026 13:04:20 -0700 Subject: [PATCH 2/2] Surface scheduled outerloop Helix work item failures as warnings (#129629) PR #129049 made scheduled outerloop builds succeed when only Helix tests fail, by setting `FailOnWorkItemFailure`/`FailOnTestFailure` to `false` on scheduled runs (via the `failOnTestFailures: false` parameter). This stopped AzDO's `always: false` scheduler from re-queueing the same commit day after day. The side effect: failed Helix work items became **completely invisible** in the Azure DevOps timeline. The `Send to Helix` step is fully green, so there is no signal that work items failed (even though, for flaky outerloop, they almost always do). Surface failed work items as **warnings** instead of silently dropping them. Warnings keep the failures visible in the timeline but do **not** degrade the build below `succeeded` (so the `always: false` re-queue fix from #129049 is preserved). - **`src/libraries/sendtohelixhelp.proj`**: new `WarnOnHelixWorkItemFailure` target (`AfterTargets=CheckHelixJobStatus`) that emits a `` for each failed `@(CompletedWorkItem)` when `WarnOnHelixTestFailure=true`. This mirrors what the Arcade SDK's `CheckHelixJobStatus` would have *errored* on, but as a warning. - **`eng/pipelines/libraries/helix.yml`**: new `warnOnTestFailures` parameter (default `false`) wired to `/p:WarnOnHelixTestFailure`. - **`eng/pipelines/libraries/outerloop.yml`**: scheduled runs now set `warnOnTestFailures: true` alongside `failOnTestFailures: false` on all three legs. No warn-as-error change was needed: the `Send to Helix` step already runs with warnaserror disabled (`_warnAsErrorParamHelixOverride`), so these warnings are not promoted back into build-failing errors. Ran the `runtime-libraries-coreclr outerloop` pipeline (dnceng-public def 125, [build 1472840](https://dev.azure.com/dnceng-public/public/_build/results?buildId=1472840)) with a temporary Manual gate. Multiple CoreCLR_Release legs completed **succeeded** with failed work items surfaced as warnings and **zero errors**, e.g.: ``` src/libraries/sendtohelixhelp.proj(364,5): warning : Work item System.Runtime.Numerics.Tests in job 2e01f1b1-... has failed. Failure log: https://helix.dot.net/api/.../console ``` Legs whose work items all passed produced no such warning, as expected. > [!NOTE] > This pull request was authored with the assistance of GitHub Copilot. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- eng/pipelines/libraries/helix.yml | 5 +++++ eng/pipelines/libraries/outerloop.yml | 12 +++++++++--- src/libraries/sendtohelixhelp.proj | 16 ++++++++++++++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/eng/pipelines/libraries/helix.yml b/eng/pipelines/libraries/helix.yml index a64b4a9b3c0605..6733be837055a4 100644 --- a/eng/pipelines/libraries/helix.yml +++ b/eng/pipelines/libraries/helix.yml @@ -21,6 +21,10 @@ parameters: # partiallySucceeded), this produces a fully successful build. Scheduled builds with # always:false set this so that flaky tests don't cause AzDO to re-queue the same commit daily. failOnTestFailures: true + # When true, failed Helix work items are surfaced as build warnings (visible in the AzDO + # timeline) instead of being silently ignored. Intended to be paired with failOnTestFailures: + # false so that failures stay visible without failing the build. + warnOnTestFailures: false steps: - script: $(_msbuildCommand) $(_warnAsErrorParamHelixOverride) -restore @@ -36,6 +40,7 @@ steps: /p:HelixBuild=$(Build.BuildNumber) /p:FailOnWorkItemFailure=${{ parameters.failOnTestFailures }} /p:FailOnTestFailure=${{ parameters.failOnTestFailures }} + /p:WarnOnHelixTestFailure=${{ parameters.warnOnTestFailures }} ${{ parameters.extraHelixArguments }} /bl:$(Build.SourcesDirectory)/artifacts/log/$(_BuildConfig)/SendToHelix.binlog displayName: Send to Helix diff --git a/eng/pipelines/libraries/outerloop.yml b/eng/pipelines/libraries/outerloop.yml index d0b85528bd1bf5..1d21af5a46db72 100644 --- a/eng/pipelines/libraries/outerloop.yml +++ b/eng/pipelines/libraries/outerloop.yml @@ -54,9 +54,11 @@ extends: testRunNamePrefixSuffix: CoreCLR_$(_BuildConfig) # On scheduled runs (always:false) don't fail the build on Helix work item or # test failures, so flaky outerloop tests don't keep AzDO re-queueing the same - # commit. The Send to Helix step fully succeeds (not partiallySucceeded). + # commit. The Send to Helix step fully succeeds (not partiallySucceeded). Failed + # work items are still surfaced as warnings in the timeline (warnOnTestFailures). ${{ if eq(variables['Build.Reason'], 'Schedule') }}: failOnTestFailures: false + warnOnTestFailures: true - ${{ if eq(variables['isRollingBuild'], false) }}: - template: /eng/pipelines/common/platform-matrix.yml @@ -86,9 +88,11 @@ extends: testScope: outerloop creator: dotnet-bot testRunNamePrefixSuffix: CoreCLR_$(_BuildConfig) - # Don't fail scheduled builds on Helix work item/test failures (see above). + # Don't fail scheduled builds on Helix work item/test failures; surface them + # as timeline warnings instead (see above). ${{ if eq(variables['Build.Reason'], 'Schedule') }}: failOnTestFailures: false + warnOnTestFailures: true - ${{ if eq(variables['includeWindowsOuterloop'], true) }}: - template: /eng/pipelines/common/platform-matrix.yml @@ -114,6 +118,8 @@ extends: testScope: outerloop creator: dotnet-bot extraHelixArguments: /p:BuildTargetFramework=net481 - # Don't fail scheduled builds on Helix work item/test failures (see above). + # Don't fail scheduled builds on Helix work item/test failures; surface them + # as timeline warnings instead (see above). ${{ if eq(variables['Build.Reason'], 'Schedule') }}: failOnTestFailures: false + warnOnTestFailures: true diff --git a/src/libraries/sendtohelixhelp.proj b/src/libraries/sendtohelixhelp.proj index 29ed70e4ff171a..c2d0b3be0cff58 100644 --- a/src/libraries/sendtohelixhelp.proj +++ b/src/libraries/sendtohelixhelp.proj @@ -384,4 +384,20 @@ DestinationFiles="@(_FilesToStage -> '$(HelixDependenciesStagingPath)\%(DirName)\%(RecursiveDir)%(FileName)%(Extension)')" SkipUnchangedFiles="true" /> + + + + +