From 78fb729ba98dd4eb4581467648de081fe5eacb50 Mon Sep 17 00:00:00 2001 From: Sergey Tepliakov Date: Wed, 20 May 2026 11:08:19 -0700 Subject: [PATCH 1/3] Bump .NET SDK to 10.0.204 --- src/global.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/global.json b/src/global.json index c89ccd4..3be15b3 100644 --- a/src/global.json +++ b/src/global.json @@ -1,6 +1,6 @@ { "sdk": { - "version": "9.0.300", + "version": "10.0.204", "rollForward": "latestFeature" } } From ac491c479b8af725eeeb2544cb8bc12ecf5b6d04 Mon Sep 17 00:00:00 2001 From: Sergey Tepliakov Date: Wed, 20 May 2026 11:08:54 -0700 Subject: [PATCH 2/3] Add EPC38: TaskEnumerableReEnumerationAnalyzer Detects re-enumeration of IEnumerable / IEnumerable>, which silently restarts the task producer and creates a new unrelated set of tasks. Includes shared EnumerationMethods helper used by future enumeration analyzers. --- docs/Rules/EPC38.md | 104 +++++ .../AsyncStuff/TaskReEnumeration.cs | 58 +++ ...askEnumerableReEnumerationAnalyzerTests.cs | 283 ++++++++++++ .../AnalyzerReleases.Unshipped.md | 1 + .../TaskEnumerableReEnumerationAnalyzer.cs | 411 ++++++++++++++++++ .../DiagnosticDescriptors.cs | 11 + .../EnumerationMethods.cs | 376 ++++++++++++++++ 7 files changed, 1244 insertions(+) create mode 100644 docs/Rules/EPC38.md create mode 100644 samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs diff --git a/docs/Rules/EPC38.md b/docs/Rules/EPC38.md new file mode 100644 index 0000000..b55bd25 --- /dev/null +++ b/docs/Rules/EPC38.md @@ -0,0 +1,104 @@ +# EPC38 - Do not re-enumerate `IEnumerable` + +Detects when a deferred `IEnumerable` / `IEnumerable>` is observed (enumerated) more than once +inside the same method body. + +## Description + +The standard way to "fan out" async work in .NET is to build a sequence of tasks from some input and then +wait for all of them: + +```csharp +var tasks = ids.Select(async id => await DoWorkAsync(id)); +await Task.WhenAll(tasks); +``` + +This *looks* like it produces a stable collection, but `Select` returns a **deferred** `IEnumerable>`: +every enumeration restarts the projection and creates a fresh set of tasks. So this seemingly innocent code: + +```csharp +var tasks = ids.Select(async id => await DoWorkAsync(id)); +await Task.WhenAll(tasks); +foreach (var t in tasks) // ❌ EPC38 +{ + Process(await t); +} +``` + +…does **not** process the results of the tasks we awaited. It starts a brand-new set of `DoWorkAsync` calls, +awaits each one again, and processes those. The work is duplicated, side effects fire twice, and timing- +sensitive behavior breaks in confusing ways. + +Materializing the sequence up front fixes the bug: + +```csharp +var tasks = ids.Select(async id => await DoWorkAsync(id)).ToArray(); +await Task.WhenAll(tasks); +foreach (var t in tasks) // ✅ tasks is Task[] — safe to re-enumerate +{ + Process(await t); +} +``` + +The analyzer flags **any** symbol whose static type is `IEnumerable`, `IEnumerable>`, +`IEnumerable`, or `IEnumerable>` that is enumerated more than once inside the same +method body. Both observations matter — the rule does not require the first one to be `Task.WhenAll`. + +## Code that triggers the analyzer + +```csharp +async Task FooAsync(IEnumerable ids) +{ + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + foreach (var t in tasks) { await t; } // ❌ EPC38 +} + +void Counts(IEnumerable> tasks) +{ + var count = tasks.Count(); + var sum = tasks.Sum(t => t.Result); // ❌ EPC38 +} + +async Task ManualAsync(IEnumerable tasks) +{ + foreach (var t in tasks) { await t; } + foreach (var t in tasks) { _ = t.Status; } // ❌ EPC38 (strict superset case) +} +``` + +## How to fix + +Materialize the sequence once and use the concrete collection from then on: + +```csharp +async Task FooAsync(IEnumerable ids) +{ + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }).ToArray(); + await Task.WhenAll(tasks); + foreach (var t in tasks) { await t; } // ✅ Task[] — safe +} +``` + +`ToList()`, `ToArray()`, and similar materializing operators all work. If the source is a parameter and you +control the API, prefer to take `IReadOnlyList>` or `Task[]` instead of `IEnumerable>`, +which both communicates intent and makes the rule fall silent. + +## When NOT to suppress + +In almost every case this is a real bug. Suppressing it usually means you've convinced yourself the producer +is *not* deferred — in which case the right fix is to change the static type of the variable to the +materialized type (`IReadOnlyList>`, `Task[]`, etc.) so future readers don't have to make the +same leap. + +## Limitations (v1) + +- Analysis is scoped to a single method body. The rule does not track tasks passed across method boundaries. +- Field and property accesses are not tracked. +- Lambdas / local functions are analyzed as independent scopes. +- Re-assignment (`tasks = freshSource;`) resets tracking from that point onward. + +## See also + +- [CA1851: Possible multiple enumerations of `IEnumerable` collection](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca1851) — the general-purpose rule. EPC38 specifically targets the task-producing case where re-enumeration is almost always a bug, not just a performance issue. +- [EPC39](EPC39.md) — quadratic enumeration inside a loop. diff --git a/samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs b/samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs new file mode 100644 index 0000000..b0890d4 --- /dev/null +++ b/samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs @@ -0,0 +1,58 @@ +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; + +namespace ErrorProne.Samples.AsyncStuff +{ + public class TaskReEnumeration + { + // Canonical bug: WhenAll consumes the deferred IEnumerable>; the subsequent foreach + // re-runs Select() and starts a NEW set of tasks that are completely unrelated to the ones we awaited. + public async Task ComputeTotalAsync(IEnumerable ids) + { + var tasks = ids.Select(async id => + { + await Task.Delay(1); + return id * 2; + }); + + await Task.WhenAll(tasks); + + var total = 0; + foreach (var t in tasks) // ❌ EPC38 — second enumeration starts new tasks + { + total += await t; + } + + return total; + } + + // Two LINQ aggregations on the same deferred enumerable: also a re-enumeration bug. + public int CountAndSum(IEnumerable> tasks) + { + var count = tasks.Count(); + var sum = tasks.Sum(t => t.Result); // ❌ EPC38 + return count + sum; + } + + // Fix: materialize the deferred enumerable first so re-iteration is safe. + public async Task ComputeTotalFixedAsync(IEnumerable ids) + { + var tasks = ids.Select(async id => + { + await Task.Delay(1); + return id * 2; + }).ToArray(); + + await Task.WhenAll(tasks); + + var total = 0; + foreach (var t in tasks) // ✅ safe — `tasks` is Task[] + { + total += await t; + } + + return total; + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs b/src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs new file mode 100644 index 0000000..a64ce97 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs @@ -0,0 +1,283 @@ +using NUnit.Framework; +using System.Threading.Tasks; +using Verify = ErrorProne.NET.TestHelpers.CSharpCodeFixVerifier< + ErrorProne.NET.AsyncAnalyzers.TaskEnumerableReEnumerationAnalyzer, + Microsoft.CodeAnalysis.Testing.EmptyCodeFixProvider>; + +namespace ErrorProne.NET.CoreAnalyzers.Tests.AsyncAnalyzers +{ + [TestFixture] + public class TaskEnumerableReEnumerationAnalyzerTests + { + [Test] + public async Task WarnsOnWhenAllThenForeach() + { + // Canonical bug: WhenAll consumes once, then foreach re-runs the iterator. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + foreach (var t in [|tasks|]) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnDoubleLinqAggregation() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + void Foo(IEnumerable> tasks) { + var first = tasks.Count(); + var second = [|tasks|].Count(); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_SingleEnumeration_WithWhenAll() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_SingleEnumeration_Foreach() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_TaskArray_DoubleEnumeration() + { + // Task[] is materialized — re-enumeration is safe. + var code = @" +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(Task[] tasks) { + await Task.WhenAll(tasks); + foreach (var t in tasks) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_ListOfTask_DoubleEnumeration() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(List tasks) { + await Task.WhenAll(tasks); + foreach (var t in tasks) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_IReadOnlyListOfTask_DoubleEnumeration() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IReadOnlyList tasks) { + await Task.WhenAll(tasks); + foreach (var t in tasks) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_MaterializedViaToArray() + { + // After ToArray, the local has type Task[], so re-enumeration is safe. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }).ToArray(); + await Task.WhenAll(tasks); + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnWhenAnyThenForeach() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAny(tasks); + foreach (var t in [|tasks|]) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnAlias() + { + // var y = tasks introduces an alias; enumerating both should still warn. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable> tasks) { + var y = tasks; + await Task.WhenAll(y); + foreach (var t in [|tasks|]) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_ReassignmentResetsEpoch() + { + // After re-assignment the second enumeration is on a fresh source. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_ManualForeachOnly() + { + // A single manual foreach (no WhenAll) over an IEnumerable is fine. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnDoubleAwaitForeach() + { + // Two manual await-foreaches on the same IEnumerable — strict-superset case. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + foreach (var t in tasks) { await t; } + foreach (var t in [|tasks|]) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnValueTaskEnumerable() + { + // IEnumerable exhibits the same hazard. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); }); + foreach (var t in tasks) { await t; } + foreach (var t in [|tasks|]) { _ = t.IsCompleted; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_NestedLambdaIndependentScope() + { + // The lambda body has its own block analysis; re-using the parameter inside the lambda once + // does NOT combine with the outer enumeration to form a 2nd site. + var code = @" +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + Action a = () => { foreach (var t in tasks) { } }; + a(); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnLinqAggregationAfterWhenAll() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable> tasks) { + await Task.WhenAll(tasks); + return [|tasks|].Sum(t => t.Result); + } +}"; + await Verify.VerifyAsync(code); + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md b/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md index e6c007f..6fb3c8a 100644 --- a/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md +++ b/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md @@ -29,6 +29,7 @@ EPC34 | ErrorHandling | Warning | MustUseResultAnalyzer EPC35 | Async | Info | DoNotBlockUnnecessarilyInAsyncMethodsAnalyzer EPC36 | Async | Info | DoNotUseAsyncDelegatesForLongRunningTasksAnalyzer EPC37 | Async | Info | DoNotValidateArgumentsInAsyncMethodsAnalyzer +EPC38 | Async | Warning | TaskEnumerableReEnumerationAnalyzer ERP021 | ErrorHandling | Warning | ThrowExAnalyzer ERP022 | ErrorHandling | Warning | SwallowAllExceptionsAnalyzer ERP031 | Concurrency | Warning | ConcurrentCollectionAnalyzer diff --git a/src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs b/src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs new file mode 100644 index 0000000..8fe6e8c --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs @@ -0,0 +1,411 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System.Collections.Generic; +using System.Collections.Immutable; +using ErrorProne.NET.Core; +using ErrorProne.NET.CoreAnalyzers; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.AsyncAnalyzers +{ + /// + /// EPC38 — detects when a deferred IEnumerable<Task> / IEnumerable<Task<T>> + /// is observed (enumerated) more than once inside the same method body. + /// + /// + /// Every enumeration of such a sequence executes the underlying producer again — e.g. + /// tasks.Select(async x => ...) creates a fresh task per element on each pass. Patterns like + /// await Task.WhenAll(tasks); foreach (var t in tasks) { ... } silently restart the work and + /// observe a different set of tasks the second time, which is almost always a bug. + /// + /// + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class TaskEnumerableReEnumerationAnalyzer : DiagnosticAnalyzerBase + { + /// + public static readonly DiagnosticDescriptor Rule = DiagnosticDescriptors.EPC38; + + /// + public override ImmutableArray SupportedDiagnostics => ImmutableArray.Create(Rule); + + /// + public TaskEnumerableReEnumerationAnalyzer() + : base(Rule) + { + } + + /// + protected override void InitializeCore(AnalysisContext context) + { + context.RegisterOperationBlockAction(AnalyzeOperationBlock); + } + + private static void AnalyzeOperationBlock(OperationBlockAnalysisContext context) + { + // Per-block tracking: for each tracked symbol record its assignment epoch and enumeration sites + // in syntactic order, then report any non-first enumeration within the same epoch. + // + // We treat lambdas / local functions as opaque (their bodies have their own block events). + var trackers = new Dictionary(SymbolEqualityComparer.Default); + + foreach (var block in context.OperationBlocks) + { + CollectSites(block, trackers, context); + } + + ReportSites(trackers, context); + } + + private static void CollectSites( + IOperation root, + Dictionary trackers, + OperationBlockAnalysisContext context) + { + foreach (var op in root.EnumerateChildOperations()) + { + // Don't descend into nested lambdas / local functions — they'll get their own block event. + if (IsInsideNestedFunction(op, root)) + { + continue; + } + + TryRecordAssignment(op, trackers, context.Compilation); + TryRecordEnumeration(op, trackers, context.Compilation); + } + } + + private static bool IsInsideNestedFunction(IOperation op, IOperation root) + { + // An operation that has a different containing local function / lambda than the block root + // is part of a nested function and will be analyzed by its own block-action invocation. + for (var current = op.Parent; current is not null && current != root; current = current.Parent) + { + if (current is IAnonymousFunctionOperation || current is ILocalFunctionOperation) + { + return true; + } + } + + return false; + } + + /// + /// Records that a tracked symbol gained a *new* value, ending its previous "enumeration epoch". + /// Aliasing (var y = x;) is also tracked: if the right-hand side is a reference to another + /// tracked symbol, the two share the same epoch. + /// + private static void TryRecordAssignment( + IOperation op, + Dictionary trackers, + Compilation compilation) + { + switch (op) + { + case IVariableDeclaratorOperation declarator: + { + var symbol = declarator.Symbol; + if (!IsTrackable(symbol.Type, compilation)) + { + return; + } + + var initializer = declarator.Initializer?.Value + ?? (declarator.Parent as IVariableDeclarationOperation)?.Initializer?.Value; + + var tracker = GetOrCreate(trackers, symbol); + var aliasRoot = EnumerationMethods.TryGetRootEnumerableSymbol(initializer, compilation); + if (aliasRoot is not null + && !SymbolEqualityComparer.Default.Equals(aliasRoot, symbol) + && IsTrackable(GetSymbolType(aliasRoot), compilation)) + { + // Right-hand side is itself a trackable enumerable (a parameter or another local). + // Make the two share a tracker so that subsequent enumerations on either name accrue + // toward the same set of sites. + var aliasTarget = GetOrCreate(trackers, aliasRoot); + tracker.AliasTo(aliasTarget); + } + else + { + tracker.NewEpoch(op.Syntax.SpanStart); + } + + return; + } + + case ISimpleAssignmentOperation assignment: + { + var rootSymbol = EnumerationMethods.TryGetRootEnumerableSymbol(assignment.Target, compilation); + if (rootSymbol is null || !IsTrackable(GetSymbolType(rootSymbol), compilation)) + { + return; + } + + var tracker = GetOrCreate(trackers, rootSymbol); + tracker.NewEpoch(op.Syntax.SpanStart); + return; + } + } + } + + /// + /// Records that an enumeration of a tracked symbol happens at the given operation site. + /// + private static void TryRecordEnumeration( + IOperation op, + Dictionary trackers, + Compilation compilation) + { + switch (op) + { + case IForEachLoopOperation foreachLoop: + RecordIfTracked( + foreachLoop.Collection, + trackers, + compilation, + foreachLoop.Collection.Syntax.GetLocation()); + return; + + case IInvocationOperation invocation: + { + var target = invocation.TargetMethod; + + // Task aggregation methods: Task.WhenAll(IEnumerable) and friends. The first + // positional argument is the enumerable being observed. + if (EnumerationMethods.IsTaskAggregationMethod(target, compilation)) + { + if (invocation.Arguments.Length > 0) + { + var argValue = invocation.Arguments[0].Value; + RecordIfTracked(argValue, trackers, compilation, argValue.Syntax.GetLocation()); + } + + return; + } + + // LINQ enumerating methods (Count, ToList, Sum, …) — receiver is the source. + if (EnumerationMethods.IsEnumeratingLinqMethod(target, compilation)) + { + // For instance / reduced-extension calls the source is the receiver. + var source = invocation.Instance ?? (invocation.Arguments.Length > 0 ? invocation.Arguments[0].Value : null); + if (source is not null) + { + RecordIfTracked(source, trackers, compilation, source.Syntax.GetLocation()); + } + + return; + } + + return; + } + } + } + + private static void RecordIfTracked( + IOperation? sourceOperation, + Dictionary trackers, + Compilation compilation, + Location reportLocation) + { + if (sourceOperation is null) + { + return; + } + + var rootSymbol = EnumerationMethods.TryGetRootEnumerableSymbol(sourceOperation, compilation); + if (rootSymbol is null) + { + return; + } + + var symbolType = GetSymbolType(rootSymbol); + if (!IsTrackable(symbolType, compilation)) + { + return; + } + + var tracker = GetOrCreate(trackers, rootSymbol); + tracker.AddEnumeration(sourceOperation.Syntax.SpanStart, reportLocation, rootSymbol.Name); + } + + private static void ReportSites( + Dictionary trackers, + OperationBlockAnalysisContext context) + { + // Each tracker is a separate logical sequence (post-alias resolution we use the *root* of the + // alias chain so multiple aliases collapse into one). We report every enumeration after the first + // in any epoch that has more than one site. + var roots = new HashSet(); + foreach (var tracker in trackers.Values) + { + roots.Add(tracker.Resolve()); + } + + foreach (var root in roots) + { + root.ReportRepeatedSites(context, Rule); + } + } + + private static bool IsTrackable(ITypeSymbol? type, Compilation compilation) + { + return EnumerationMethods.IsTaskEnumerableType(type, compilation); + } + + private static ITypeSymbol? GetSymbolType(ISymbol symbol) + { + return symbol switch + { + ILocalSymbol l => l.Type, + IParameterSymbol p => p.Type, + _ => null, + }; + } + + private static SymbolTracker GetOrCreate(Dictionary trackers, ISymbol symbol) + { + if (!trackers.TryGetValue(symbol, out var tracker)) + { + tracker = new SymbolTracker(symbol); + trackers[symbol] = tracker; + } + + return tracker; + } + + /// + /// Per-symbol enumeration history. Records all assignment positions and all enumeration sites + /// (regardless of visit order), then groups sites into "epochs" delimited by assignment positions + /// at report time. Multiple aliased symbols share a single underlying tracker via + /// / . + /// + private sealed class SymbolTracker + { + private readonly ISymbol _symbol; + private SymbolTracker? _aliasTarget; + + private readonly List _assignmentPositions = new(); + private readonly List _sites = new(); + + public SymbolTracker(ISymbol symbol) => _symbol = symbol; + + public SymbolTracker Resolve() + { + var current = this; + while (current._aliasTarget is not null) + { + current = current._aliasTarget; + } + + return current; + } + + public void AliasTo(SymbolTracker target) + { + var resolvedTarget = target.Resolve(); + if (ReferenceEquals(resolvedTarget, this)) + { + return; + } + + // Migrate any sites/assignments that were recorded on this tracker (e.g. discovered before + // the alias relationship was established) onto the target tracker. + resolvedTarget._sites.AddRange(_sites); + resolvedTarget._assignmentPositions.AddRange(_assignmentPositions); + _sites.Clear(); + _assignmentPositions.Clear(); + + _aliasTarget = resolvedTarget; + } + + public void NewEpoch(int positionStart) + { + Resolve()._assignmentPositions.Add(positionStart); + } + + public void AddEnumeration(int positionStart, Location location, string symbolName) + { + Resolve()._sites.Add(new Site(positionStart, location, symbolName)); + } + + public void ReportRepeatedSites(OperationBlockAnalysisContext context, DiagnosticDescriptor rule) + { + if (_sites.Count < 2) + { + return; + } + + _sites.Sort(static (a, b) => a.Position.CompareTo(b.Position)); + _assignmentPositions.Sort(); + + // Group sites by epoch (= run between consecutive assignment positions). + var epochStart = 0; + while (epochStart < _sites.Count) + { + var epochEnd = FindEpochEnd(epochStart); + if (epochEnd - epochStart >= 2) + { + // Report every site in this epoch after the first one. + for (var i = epochStart + 1; i < epochEnd; i++) + { + var site = _sites[i]; + context.ReportDiagnostic(Diagnostic.Create(rule, site.Location, site.SymbolName)); + } + } + + epochStart = epochEnd; + } + } + + private int FindEpochEnd(int epochStart) + { + // The epoch containing _sites[epochStart] runs up to (but not including) the first site whose + // position is on the other side of an assignment with respect to _sites[epochStart]. + var startPos = _sites[epochStart].Position; + var firstBoundary = -1; + foreach (var assignment in _assignmentPositions) + { + if (assignment > startPos) + { + firstBoundary = assignment; + break; + } + } + + if (firstBoundary < 0) + { + return _sites.Count; + } + + for (var i = epochStart + 1; i < _sites.Count; i++) + { + if (_sites[i].Position >= firstBoundary) + { + return i; + } + } + + return _sites.Count; + } + + private readonly struct Site + { + public int Position { get; } + public Location Location { get; } + public string SymbolName { get; } + + public Site(int position, Location location, string symbolName) + { + Position = position; + Location = location; + SymbolName = symbolName; + } + } + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs b/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs index 7ff0c4d..02e42ca 100644 --- a/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs +++ b/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs @@ -312,6 +312,17 @@ internal static class DiagnosticDescriptors description: "Argument validation in async methods does not throw exceptions eagerly. The exception is thrown when the task is awaited, which can lead to unexpected behavior.", helpLinkUri: GetHelpUri(nameof(EPC37))); + /// + public static readonly DiagnosticDescriptor EPC38 = new DiagnosticDescriptor( + nameof(EPC38), + title: "Do not re-enumerate IEnumerable", + messageFormat: "Possible re-enumeration of IEnumerable '{0}'. Each enumeration starts a new set of tasks, which is almost certainly a bug.", + category: AsyncCategory, + defaultSeverity: DiagnosticSeverity.Warning, + isEnabledByDefault: true, + description: "Enumerating an IEnumerable more than once (for example, via Task.WhenAll followed by a foreach over the same enumerable) re-executes the underlying task producer and creates a new, unrelated set of tasks. Materialize with .ToArray() or .ToList() before observing.", + helpLinkUri: GetHelpUri(nameof(EPC38))); + public static string GetHelpUri(string ruleId) { return $"https://github.com/SergeyTeplyakov/ErrorProne.NET/tree/master/docs/Rules/{ruleId}.md"; diff --git a/src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs b/src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs new file mode 100644 index 0000000..3602e0d --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs @@ -0,0 +1,376 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using ErrorProne.NET.Core; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.CoreAnalyzers +{ + /// + /// Shared helpers for analyzers that reason about how an + /// is consumed (EPC38, EPC39). + /// + internal static class EnumerationMethods + { + /// + /// Names of methods that fully materialize / iterate the source + /// (i.e. they call GetEnumerator() at least once when invoked). + /// + private static readonly ImmutableHashSet EnumeratingLinqMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "Aggregate", + "All", + "Any", + "Average", + "Contains", + "Count", + "ElementAt", + "ElementAtOrDefault", + "First", + "FirstOrDefault", + "Last", + "LastOrDefault", + "LongCount", + "Max", + "MaxBy", + "Min", + "MinBy", + "SequenceEqual", + "Single", + "SingleOrDefault", + "Sum", + "ToArray", + "ToDictionary", + "ToFrozenDictionary", + "ToFrozenSet", + "ToHashSet", + "ToList", + "ToLookup"); + + /// + /// Names of methods that produce a *new*, materialized collection + /// from the source. + /// + private static readonly ImmutableHashSet MaterializingLinqMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "ToArray", + "ToDictionary", + "ToFrozenDictionary", + "ToFrozenSet", + "ToHashSet", + "ToList", + "ToLookup"); + + /// + /// Names of methods that return a deferred enumerable + /// (LINQ chain methods). They do not enumerate their source. + /// + private static readonly ImmutableHashSet ChainLinqMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "Append", + "AsEnumerable", + "Cast", + "Concat", + "DefaultIfEmpty", + "Distinct", + "DistinctBy", + "Except", + "ExceptBy", + "GroupBy", + "GroupJoin", + "Intersect", + "IntersectBy", + "Join", + "OfType", + "OrderBy", + "OrderByDescending", + "Prepend", + "Reverse", + "Select", + "SelectMany", + "Skip", + "SkipLast", + "SkipWhile", + "Take", + "TakeLast", + "TakeWhile", + "ThenBy", + "ThenByDescending", + "Union", + "UnionBy", + "Where", + "Zip"); + + /// + /// Names of methods on that take an + /// of tasks and observe every element. + /// + private static readonly ImmutableHashSet TaskAggregationMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "WhenAll", + "WhenAny", + "WaitAll", + "WaitAny"); + + /// + /// Names of static methods that *produce* a new deferred + /// from non-enumerable inputs (so every consumer pass starts a fresh + /// iteration). + /// + private static readonly ImmutableHashSet EnumerableProducerMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "Empty", + "Range", + "Repeat"); + + /// + /// Returns if invoking consumes its source enumerable + /// at least once (e.g. Count(), ToList(), Any(), …). + /// + public static bool IsEnumeratingLinqMethod(IMethodSymbol method, Compilation compilation) + { + if (method.MethodKind != MethodKind.Ordinary && method.MethodKind != MethodKind.ReducedExtension) + { + return false; + } + + if (!EnumeratingLinqMethods.Contains(method.Name)) + { + return false; + } + + var containing = (method.ReducedFrom ?? method).ContainingType; + return containing.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if is a LINQ chain method + /// (Where, Select, OrderBy, …) that produces a new deferred enumerable. + /// + public static bool IsLinqChainMethod(IMethodSymbol method, Compilation compilation) + { + if (!ChainLinqMethods.Contains(method.Name)) + { + return false; + } + + var containing = (method.ReducedFrom ?? method).ContainingType; + return containing.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if invoking produces a freshly + /// materialized collection from the source (the source is consumed but the *result* is no longer deferred). + /// + public static bool IsMaterializingLinqMethod(IMethodSymbol method, Compilation compilation) + { + if (!MaterializingLinqMethods.Contains(method.Name)) + { + return false; + } + + var containing = (method.ReducedFrom ?? method).ContainingType; + return containing.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if is Task.WhenAll, + /// Task.WhenAny, Task.WaitAll, or Task.WaitAny. + /// + public static bool IsTaskAggregationMethod(IMethodSymbol method, Compilation compilation) + { + if (!method.IsStatic || !TaskAggregationMethods.Contains(method.Name)) + { + return false; + } + + return method.ContainingType.IsClrType(compilation, typeof(System.Threading.Tasks.Task)); + } + + /// + /// Returns if is one of + /// Enumerable.Range, Enumerable.Repeat, or Enumerable.Empty — i.e. + /// a static factory that produces a fresh deferred enumerable each time it is invoked. + /// + public static bool IsEnumerableProducerMethod(IMethodSymbol method, Compilation compilation) + { + if (!method.IsStatic || !EnumerableProducerMethods.Contains(method.Name)) + { + return false; + } + + return method.ContainingType.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if is the deferred + /// or non-generic interface itself. Concrete materialized + /// collections (T[], List<T>, IReadOnlyList<T>, HashSet<T>, …) + /// return . + /// + public static bool IsDeferredEnumerableType(ITypeSymbol? type) + { + if (type is null) + { + return false; + } + + // SpecialType lives on the original (unconstructed) definition. For a constructed type like + // IEnumerable>, type.SpecialType is None — we must inspect OriginalDefinition. + switch (type.OriginalDefinition.SpecialType) + { + case SpecialType.System_Collections_Generic_IEnumerable_T: + case SpecialType.System_Collections_IEnumerable: + return true; + } + + // ParallelQuery, OrderedEnumerable, IGrouping, IOrderedEnumerable, etc. are + // intentionally NOT included to keep the rule precise. The most common deferred shape from a LINQ + // chain has the static type IEnumerable, which is handled above. + return false; + } + + /// + /// Returns if is + /// of , Task<T>, ValueTask, or + /// ValueTask<T>. + /// + public static bool IsTaskEnumerableType(ITypeSymbol? type, Compilation compilation) + { + if (type is not INamedTypeSymbol named) + { + return false; + } + + if (!IsDeferredEnumerableType(named)) + { + return false; + } + + if (named.TypeArguments.Length != 1) + { + return false; + } + + return named.TypeArguments[0].IsTaskLike(compilation); + } + + /// + /// Returns if is some materialized + /// T[]/List<T>/IReadOnlyList<T>/IList<T>/ + /// ICollection<T>/IReadOnlyCollection<T>/ImmutableArray<T>/ + /// HashSet<T> — i.e. cases where re-enumeration is safe. + /// + public static bool IsMaterializedCollectionType(ITypeSymbol? type) + { + if (type is null) + { + return false; + } + + if (type.TypeKind == TypeKind.Array) + { + return true; + } + + // Match by metadata name to avoid one-by-one compilation lookups (and to handle missing references). + return type.OriginalDefinition.SpecialType switch + { + SpecialType.System_Collections_Generic_ICollection_T => true, + SpecialType.System_Collections_Generic_IList_T => true, + SpecialType.System_Collections_Generic_IReadOnlyCollection_T => true, + SpecialType.System_Collections_Generic_IReadOnlyList_T => true, + _ => IsMaterializedByMetadataName(type), + }; + } + + private static bool IsMaterializedByMetadataName(ITypeSymbol type) + { + var name = type.OriginalDefinition.ToDisplayString(); + return name switch + { + "System.Collections.Generic.List" => true, + "System.Collections.Generic.HashSet" => true, + "System.Collections.Generic.Dictionary" => true, + "System.Collections.Generic.SortedSet" => true, + "System.Collections.Generic.SortedList" => true, + "System.Collections.Generic.SortedDictionary" => true, + "System.Collections.Generic.Queue" => true, + "System.Collections.Generic.Stack" => true, + "System.Collections.Immutable.ImmutableArray" => true, + "System.Collections.Immutable.ImmutableList" => true, + "System.Collections.Immutable.ImmutableHashSet" => true, + "System.Collections.Immutable.ImmutableSortedSet" => true, + "System.Collections.Immutable.ImmutableDictionary" => true, + "System.Collections.Frozen.FrozenSet" => true, + "System.Collections.Frozen.FrozenDictionary" => true, + "System.Collections.Concurrent.ConcurrentBag" => true, + _ => false, + }; + } + + /// + /// Walks back through LINQ chain calls (x.Where(...).Select(...).OrderBy(...)) and casts/conversions + /// to find the underlying local/parameter that the chain ultimately reads. + /// + /// Returns the root if it is a or + /// , otherwise . + /// + public static ISymbol? TryGetRootEnumerableSymbol(IOperation? operation, Compilation compilation) + { + while (operation is not null) + { + switch (operation) + { + case IConversionOperation conv: + operation = conv.Operand; + continue; + + case IParenthesizedOperation paren: + operation = paren.Operand; + continue; + + case ILocalReferenceOperation local: + return local.Local; + + case IParameterReferenceOperation parameter: + return parameter.Parameter; + + case IInvocationOperation invocation: + { + // LINQ chain: keep walking back through `.Where(...).Select(...)` to the chain root. + if (IsLinqChainMethod(invocation.TargetMethod, compilation)) + { + // For an extension method, the source is the first argument; for the (rare) + // explicit Enumerable.Xxx(source, ...) form it is the same. + if (invocation.Arguments.Length > 0) + { + operation = invocation.Arguments[0].Value; + continue; + } + + operation = invocation.Instance; + continue; + } + + // Any other invocation is a wall; we cannot reason about it. + return null; + } + + default: + return null; + } + } + + return null; + } + } +} From 6af740442fefcf49374085be74db208434e4b17e Mon Sep 17 00:00:00 2001 From: Sergey Tepliakov Date: Wed, 20 May 2026 11:09:26 -0700 Subject: [PATCH 3/3] Add EPC39: QuadraticEnumerationAnalyzer Flags iterating the same enumerable inside a loop, turning O(N) walks into O(N*M). Suggests materializing the source with .ToList()/.ToArray() before the loop. --- docs/Rules/EPC39.md | 98 ++++++ .../CoreAnalyzers/QuadraticEnumeration.cs | 63 ++++ .../QuadraticEnumerationAnalyzerTests.cs | 301 ++++++++++++++++++ .../AnalyzerReleases.Unshipped.md | 1 + .../QuadraticEnumerationAnalyzer.cs | 230 +++++++++++++ .../DiagnosticDescriptors.cs | 11 + 6 files changed, 704 insertions(+) create mode 100644 docs/Rules/EPC39.md create mode 100644 samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs diff --git a/docs/Rules/EPC39.md b/docs/Rules/EPC39.md new file mode 100644 index 0000000..d2e90c6 --- /dev/null +++ b/docs/Rules/EPC39.md @@ -0,0 +1,98 @@ +# EPC39 - Same enumerable iterated more than once inside a loop + +Detects three patterns where the *same* `IEnumerable` is iterated more than once inside an outer loop, +producing O(N²) (or worse) behavior. + +## Description + +Iterating the same source twice inside a loop turns an O(N) walk into an O(N·M) one. When the inner pass +is a deferred LINQ chain, every outer iteration also re-runs every chained filter / projection. + +The analyzer recognizes three sub-patterns and reports each with a tailored message: + +### Q1 — Same-symbol nested enumeration + +```csharp +foreach (var x in coll) +{ + var n = coll.Count(); // ❌ EPC39 — Count() re-iterates the entire collection on every step +} +``` + +### Q2 — Deferred IEnumerable re-enumerated in a loop + +```csharp +IEnumerable q = source.Where(x => x > 0); +foreach (var i in outer) +{ + var c = q.Count(); // ❌ EPC39 — Where(...) re-runs every iteration +} +``` + +### Q4 — Nested foreach over the same source + +```csharp +foreach (var x in coll) +{ + foreach (var y in coll) { ... } // ❌ EPC39 — O(N²) +} +``` + +## How to fix + +### Q1 / Q2 — materialize once, or use a different shape + +```csharp +var arr = coll.ToArray(); // or .ToList() +foreach (var x in arr) +{ + var n = arr.Length; // O(1) +} +``` + +If the inner work is genuinely "count every element matching X", pre-compute it outside the loop or replace +the loop body with a single LINQ aggregation that performs one pass. + +### Q4 — symmetric problems usually benefit from a hash join + +```csharp +var lookup = new HashSet(coll); +foreach (var x in coll) +{ + if (lookup.Contains(-x)) { ... } +} +``` + +## When NOT to suppress + +If the data set is *known* to be small (say, ≤ 16 elements) and the loop is on a hot path with a tight +cache footprint, a linear scan can actually be faster than a HashSet lookup. In those cases prefer to +suppress at the call site with a comment explaining the choice: + +```csharp +#pragma warning disable EPC39 // List is bounded to <=8 elements; HashSet has higher constant factor. +``` + +## Patterns intentionally NOT flagged + +- Calling `List.Contains`, `Array.IndexOf`, `List.FindIndex`, etc. inside a loop on a collection + that is **not** the loop's source. The cost is O(N·M), but in real codebases these patterns are + frequently correct on small inputs and previously produced too much noise. Use + [CA1851](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca1851) + if you want broader "any double-enumeration" coverage, or [EPC23](EPC23.md) for the specific + `HashSet.Contains`-via-LINQ misuse. +- `ImmutableArray.Contains` and `Span.IndexOf` — they don't allocate and are typically used on + tiny inputs. + +## Limitations (v2) + +- Analysis is scoped to a single method body. Helper calls that internally enumerate are not tracked. +- Q1/Q2 LINQ root resolution stops at the first non-chain call. A chain like + `GetItems().Where(...).Count()` is not flagged unless the root is a local/parameter (because resolving + the source through arbitrary method bodies is unsound). + +## See also + +- [CA1851: Possible multiple enumerations of `IEnumerable` collection](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca1851) — the general-purpose rule. EPC39 is more aggressive in the *quadratic* dimension (it fires inside loops) but narrower than CA1851 in scope (it only fires when the source is the same enumerable). +- [EPC38](EPC38.md) — re-enumeration of `IEnumerable` (correctness bug). +- [EPC23](EPC23.md) — `Enumerable.Contains` on a `HashSet` (a related "wrong tool for the job" pattern). diff --git a/samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs b/samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs new file mode 100644 index 0000000..2915329 --- /dev/null +++ b/samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs @@ -0,0 +1,63 @@ +using System.Collections.Generic; +using System.Linq; + +namespace ErrorProne.Samples.CoreAnalyzers +{ + public class QuadraticEnumeration + { + // Q1 — LINQ-enumerating call whose source is the same enumerable being iterated. O(N^2). + public int SameSourceCount(IEnumerable coll) + { + var total = 0; + foreach (var x in coll) + { + var n = coll.Count(); // ❌ EPC39 (Q1) + total += x * n; + } + + return total; + } + + // Q2 — re-enumeration of a deferred IEnumerable inside a loop. Each iteration re-runs Where(). + public int DeferredInLoop(IEnumerable source, int[] outer) + { + IEnumerable q = source.Where(x => x > 0); + var total = 0; + foreach (var i in outer) + { + total += q.Count(); // ❌ EPC39 (Q2) + } + + return total; + } + + // Q4 — nested foreach over the same source. + public int NestedForeachSameSource(List coll) + { + var pairs = 0; + foreach (var x in coll) + { + foreach (var y in coll) // ❌ EPC39 (Q4) + { + if (x + y == 0) pairs++; + } + } + + return pairs; + } + + // ✅ Fixes: + + public int DeferredInLoopFixed(IEnumerable source, int[] outer) + { + var q = source.Where(x => x > 0).ToList(); // materialize once + var total = 0; + foreach (var i in outer) + { + total += q.Count; + } + + return total; + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs new file mode 100644 index 0000000..b8de13e --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs @@ -0,0 +1,301 @@ +using NUnit.Framework; +using System.Threading.Tasks; +using Verify = ErrorProne.NET.TestHelpers.CSharpCodeFixVerifier< + ErrorProne.NET.CoreAnalyzers.QuadraticEnumerationAnalyzer, + Microsoft.CodeAnalysis.Testing.EmptyCodeFixProvider>; + +namespace ErrorProne.NET.CoreAnalyzers.Tests.CoreAnalyzers +{ + [TestFixture] + public class QuadraticEnumerationAnalyzerTests + { + // -------- Q1: Same-symbol nested enumeration --------------------------------------------------- + + [Test] + public async Task Q1_Warns_OnEnumerableCount_WithSameSource() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable coll) { + foreach (var x in coll) { + var n = [|coll.Count()|]; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q1_Warns_OnAny_WithSameSource() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable coll) { + foreach (var x in coll) { + if ([|coll.Any(y => y > x)|]) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + // -------- Q2: Deferred re-enumeration in a loop ------------------------------------------------ + + [Test] + public async Task Q2_Warns_OnDeferredLocalInsideLoop() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + IEnumerable q = source.Where(x => x > 0); + foreach (var i in outer) { + var c = [|q.Count()|]; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenLocalIsList() + { + // Same shape as Q2 but the local is concretely typed as List — no deferred re-enum. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(int[] outer) { + List q = new List { 1, 2, 3 }; + foreach (var i in outer) { + var c = q.Count; + } + } +}"; + await Verify.VerifyAsync(code); + } + + // -------- Q4: Nested foreach over the same source ---------------------------------------------- + + [Test] + public async Task Q4_Warns_OnNestedForeachOverSameLocal() + { + var code = @" +using System.Collections.Generic; +class Test { + void Foo(List coll) { + foreach (var x in coll) { + foreach (var y in [|coll|]) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q4_NoWarn_OnNestedForeachOverDistinctSource() + { + var code = @" +using System.Collections.Generic; +class Test { + void Foo(List a, List b) { + foreach (var x in a) { + foreach (var y in b) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + // -------- Negative / boundary cases ------------------------------------------------------------ + + [Test] + public async Task NoWarn_OnSingleForeachNoInner() + { + var code = @" +using System.Collections.Generic; +class Test { + void Foo(IEnumerable coll) { + foreach (var x in coll) { } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnEnumerableCount_OutsideLoop() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable coll) { + var n = coll.Count(); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnForLoopUsingArrayLength() + { + // for (var i = 0; i < arr.Length; i++) — arr.Length is a property, not a method call. + var code = @" +class Test { + void Foo(int[] arr) { + for (var i = 0; i < arr.Length; i++) { + var v = arr[i]; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnListContainsInsideLoop_DifferentSource() + { + // After the v2 tightening EPC39 only flags re-iteration of the *same* enumerable. A generic + // "List.Contains called in a loop on a different collection" pattern is no longer reported, + // even though it is technically O(N·M) — too noisy in real codebases. + var code = @" +using System.Collections.Generic; +class Test { + void Foo(List haystack, IEnumerable needles) { + foreach (var n in needles) { + if (haystack.Contains(n)) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_Warns_InsideWhileLoop() + { + // EPC39 fires for any loop kind, not just foreach. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int n) { + IEnumerable q = source.Where(x => x > 0); + var i = 0; + while (i < n) { + var c = [|q.Count()|]; + i++; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_AfterMaterialization() + { + // Materialize once before the loop — re-enumeration is now safe. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + var q = source.Where(x => x > 0).ToList(); + foreach (var i in outer) { + var c = q.Count; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenDeferredLocalIsDeclaredInsideLoopBody() + { + // The deferred IEnumerable local is declared *inside* the loop body — a fresh enumerable is + // bound every iteration, so multiple enumerations within a single iteration do NOT compound + // with the outer loop size (it is a CA1851-style concern, not quadratic-in-the-loop). + var code = @" +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +class Test { + void Foo(IEnumerable types) { + foreach (var type in types) { + IEnumerable attrs = type.GetCustomAttributes(); + var a = attrs.Any(x => x.GetType() == typeof(ObsoleteAttribute)); + var b = attrs.Any(x => x.GetType() == typeof(FlagsAttribute)); + var c = attrs.Any(x => x.GetType() == typeof(SerializableAttribute)); + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenDeferredLocalIsDeclaredInsideLoopBody_SingleEnumeration() + { + // Even a single enumeration on a deferred local declared inside the loop should not fire: + // there is no outer-loop-multiplier to amortize away by hoisting. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + foreach (var i in outer) { + IEnumerable q = source.Where(x => x > i); + var c = q.Count(); + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenDeferredLocalIsDeclaredInsideNestedBlockOfLoopBody() + { + // A local declared inside an inner `if` block (still syntactically inside the loop body) is + // also fresh per iteration — must not warn. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + foreach (var i in outer) { + if (i > 0) { + IEnumerable q = source.Where(x => x > i); + var a = q.Any(); + var b = q.Count(); + } + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NestedLoops_OnlyInnerReports() + { + // Re-enumerating a deferred IEnumerable inside an inner loop should report ONCE (against the + // inner loop's body), not twice (once for the outer and once for the inner). + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, IEnumerable a, IEnumerable b) { + IEnumerable q = source.Where(x => x > 0); + foreach (var x in a) { + foreach (var y in b) { + var c = [|q.Count()|]; + } + } + } +}"; + await Verify.VerifyAsync(code); + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md b/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md index 6fb3c8a..3c0e0ab 100644 --- a/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md +++ b/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md @@ -30,6 +30,7 @@ EPC35 | Async | Info | DoNotBlockUnnecessarilyInAsyncMethodsAnalyzer EPC36 | Async | Info | DoNotUseAsyncDelegatesForLongRunningTasksAnalyzer EPC37 | Async | Info | DoNotValidateArgumentsInAsyncMethodsAnalyzer EPC38 | Async | Warning | TaskEnumerableReEnumerationAnalyzer +EPC39 | Performance | Warning | QuadraticEnumerationAnalyzer ERP021 | ErrorHandling | Warning | ThrowExAnalyzer ERP022 | ErrorHandling | Warning | SwallowAllExceptionsAnalyzer ERP031 | Concurrency | Warning | ConcurrentCollectionAnalyzer diff --git a/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs new file mode 100644 index 0000000..2d48f22 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs @@ -0,0 +1,230 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System.Collections.Generic; +using System.Collections.Immutable; +using ErrorProne.NET.Core; +using ErrorProne.NET.CoreAnalyzers; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.CoreAnalyzers +{ + /// + /// EPC39 — detects three patterns where the *same* enumerable is iterated more than once inside a loop, + /// producing O(N²) (or worse) behavior: + /// + /// Q1 — same-symbol nested enumeration: a LINQ-enumerating call whose source is the + /// same local/parameter that is also the foreach source. + /// Q2 — re-enumeration of a deferred + /// inside a loop (the receiver's static type is IEnumerable<T>). + /// Q4 — nested foreach over the same source. + /// + /// + /// Generic "O(N) method called inside a loop" patterns (e.g. List<T>.Contains / + /// Array.IndexOf on a different collection than the loop source) are intentionally NOT flagged: + /// they are often correct on small inputs and produced too much noise in real codebases. + /// + /// + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class QuadraticEnumerationAnalyzer : DiagnosticAnalyzerBase + { + /// + public static readonly DiagnosticDescriptor Rule = DiagnosticDescriptors.EPC39; + + /// + public override ImmutableArray SupportedDiagnostics => ImmutableArray.Create(Rule); + + /// + public QuadraticEnumerationAnalyzer() + : base(Rule) + { + } + + /// + protected override void InitializeCore(AnalysisContext context) + { + context.RegisterOperationAction(AnalyzeLoop, OperationKind.Loop); + } + + private static void AnalyzeLoop(OperationAnalysisContext context) + { + var loop = (ILoopOperation)context.Operation; + + // Resolve the loop's source symbol if it's a foreach over a local/parameter (drives Q1 and Q4). + ISymbol? loopSource = null; + if (loop is IForEachLoopOperation foreachLoop) + { + loopSource = EnumerationMethods.TryGetRootEnumerableSymbol(foreachLoop.Collection, context.Compilation); + } + + var reported = new HashSet(); + + foreach (var op in loop.Body.EnumerateChildOperations()) + { + if (IsInsideNestedFunction(op, loop.Body)) + { + continue; + } + + // Skip any operation that lives inside an inner loop — the inner loop will analyze + // its own body on its own context, and reporting at both levels causes noise. + if (IsInsideInnerLoop(op, loop)) + { + continue; + } + + switch (op) + { + case IForEachLoopOperation inner when loopSource is not null: + { + // Q4 — nested foreach over the same source as the outer foreach. + var innerSource = EnumerationMethods.TryGetRootEnumerableSymbol(inner.Collection, context.Compilation); + if (innerSource is not null && SymbolEqualityComparer.Default.Equals(innerSource, loopSource)) + { + Report(context, reported, inner.Collection.Syntax, + $"nested foreach iterates over the same source '{loopSource.Name}' twice (O(N²))."); + } + + break; + } + + case IInvocationOperation invocation: + AnalyzeInvocation(context, loop, loopSource, invocation, reported); + break; + } + } + } + + private static void AnalyzeInvocation( + OperationAnalysisContext context, + ILoopOperation loop, + ISymbol? loopSource, + IInvocationOperation invocation, + HashSet reported) + { + var compilation = context.Compilation; + var target = invocation.TargetMethod; + + // ------- Q1 & Q2: enumerating LINQ method called on some source ---------------------------- + if (EnumerationMethods.IsEnumeratingLinqMethod(target, compilation)) + { + var sourceOp = invocation.Instance + ?? (invocation.Arguments.Length > 0 ? invocation.Arguments[0].Value : null); + if (sourceOp is null) + { + return; + } + + var rootSymbol = EnumerationMethods.TryGetRootEnumerableSymbol(sourceOp, compilation); + if (rootSymbol is null) + { + return; + } + + // Q1: same symbol as the outer foreach's collection. + if (loopSource is not null && SymbolEqualityComparer.Default.Equals(rootSymbol, loopSource)) + { + Report(context, reported, invocation.Syntax, + $"LINQ method '{target.Name}' is called on the same enumerable '{rootSymbol.Name}' that is being iterated (O(N²))."); + return; + } + + // Q2: source is a deferred IEnumerable local/parameter. + var symbolType = GetSymbolType(rootSymbol); + if (EnumerationMethods.IsDeferredEnumerableType(symbolType)) + { + // If the source local is declared inside the loop body, a fresh enumerable is bound + // each iteration — the cost does NOT compound with the outer loop size. Multiple + // enumerations within a single iteration are a constant-factor multiple-enumeration + // concern (see CA1851), not the quadratic-in-the-loop pattern EPC39 targets. Skip. + if (IsLocalDeclaredInsideLoopBody(rootSymbol, loop)) + { + return; + } + + Report(context, reported, invocation.Syntax, + $"LINQ method '{target.Name}' re-enumerates deferred IEnumerable '{rootSymbol.Name}' on every iteration; materialize it with .ToList()/.ToArray() before the loop."); + return; + } + } + } + + private static bool IsLocalDeclaredInsideLoopBody(ISymbol symbol, ILoopOperation loop) + { + if (symbol is not ILocalSymbol local) + { + return false; + } + + var bodySyntax = loop.Body.Syntax; + var bodyTree = bodySyntax.SyntaxTree; + var bodySpan = bodySyntax.Span; + + foreach (var reference in local.DeclaringSyntaxReferences) + { + if (reference.SyntaxTree == bodyTree && bodySpan.Contains(reference.Span)) + { + return true; + } + } + + return false; + } + + private static void Report( + OperationAnalysisContext context, + HashSet reported, + SyntaxNode syntax, + string message) + { + if (!reported.Add(syntax)) + { + return; + } + + context.ReportDiagnostic(Diagnostic.Create(Rule, syntax.GetLocation(), message)); + } + + private static bool IsInsideNestedFunction(IOperation op, IOperation root) + { + for (var current = op.Parent; current is not null && current != root; current = current.Parent) + { + if (current is IAnonymousFunctionOperation || current is ILocalFunctionOperation) + { + return true; + } + } + + return false; + } + + private static bool IsInsideInnerLoop(IOperation op, ILoopOperation outerLoop) + { + // Walk up from `op` looking for a loop operation that is strictly inside `outerLoop`. + for (var current = op.Parent; current is not null && current != outerLoop; current = current.Parent) + { + if (current is ILoopOperation && current != outerLoop) + { + return true; + } + } + + return false; + } + + private static ITypeSymbol? GetSymbolType(ISymbol symbol) + { + return symbol switch + { + ILocalSymbol l => l.Type, + IParameterSymbol p => p.Type, + _ => null, + }; + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs b/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs index 02e42ca..8b333f0 100644 --- a/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs +++ b/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs @@ -323,6 +323,17 @@ internal static class DiagnosticDescriptors description: "Enumerating an IEnumerable more than once (for example, via Task.WhenAll followed by a foreach over the same enumerable) re-executes the underlying task producer and creates a new, unrelated set of tasks. Materialize with .ToArray() or .ToList() before observing.", helpLinkUri: GetHelpUri(nameof(EPC38))); + /// + public static readonly DiagnosticDescriptor EPC39 = new DiagnosticDescriptor( + nameof(EPC39), + title: "Same enumerable iterated more than once inside a loop", + messageFormat: "Quadratic enumeration: {0}", + category: PerformanceCategory, + defaultSeverity: DiagnosticSeverity.Warning, + isEnabledByDefault: true, + description: "Iterating the same enumerable more than once inside a loop turns an O(N) walk into an O(N*M) one. Materialize the source with .ToList()/.ToArray() before the loop, or restructure to enumerate once.", + helpLinkUri: GetHelpUri(nameof(EPC39))); + public static string GetHelpUri(string ruleId) { return $"https://github.com/SergeyTeplyakov/ErrorProne.NET/tree/master/docs/Rules/{ruleId}.md";