From 9afb7e124e1af4b1a059c9ac989f4125b5e42371 Mon Sep 17 00:00:00 2001 From: Sergey Tepliakov Date: Wed, 20 May 2026 13:20:14 -0700 Subject: [PATCH] Add EPC38, EPC39, EPC40, EPC41 analyzers Introduces four new analyzers: - EPC38 TaskEnumerableReEnumerationAnalyzer (Async): flags re-enumeration of IEnumerable, which silently restarts the task producer. Disabled by default. - EPC39 QuadraticEnumerationAnalyzer (Performance): flags iterating the same enumerable inside a loop (O(N*M) walks). Disabled by default. - EPC40 PrivateMethodMultipleEnumerationAnalyzer (Performance): flags private methods that enumerate an IEnumerable parameter multiple times when a caller passes a deferred LINQ query. Disabled by default. - EPC41 FormatMethodArgumentsAnalyzer (ErrorHandling): validates format-string placeholders against arguments for user-annotated formatting methods (configured via 'dotnet_diagnostic.EPC41.format_methods' in .editorconfig). Enabled by default as Warning. Includes shared EnumerationMethods helper used by EPC38/39/40. --- docs/Rules/EPC38.md | 104 ++++ docs/Rules/EPC39.md | 98 ++++ docs/Rules/EPC40.md | 147 ++++++ docs/Rules/EPC41.md | 115 +++++ .../AsyncStuff/TaskReEnumeration.cs | 58 +++ .../PrivateMultipleEnumeration.cs | 71 +++ .../CoreAnalyzers/QuadraticEnumeration.cs | 63 +++ ...askEnumerableReEnumerationAnalyzerTests.cs | 283 +++++++++++ .../FormatMethodArgumentsAnalyzerTests.cs | 337 +++++++++++++ ...eMethodMultipleEnumerationAnalyzerTests.cs | 355 +++++++++++++ .../QuadraticEnumerationAnalyzerTests.cs | 301 +++++++++++ .../AnalyzerReleases.Unshipped.md | 4 + .../TaskEnumerableReEnumerationAnalyzer.cs | 411 +++++++++++++++ .../FormatMethodArgumentsAnalyzer.cs | 441 ++++++++++++++++ ...rivateMethodMultipleEnumerationAnalyzer.cs | 471 ++++++++++++++++++ .../QuadraticEnumerationAnalyzer.cs | 230 +++++++++ .../DiagnosticDescriptors.cs | 44 ++ .../EnumerationMethods.cs | 376 ++++++++++++++ 18 files changed, 3909 insertions(+) create mode 100644 docs/Rules/EPC38.md create mode 100644 docs/Rules/EPC39.md create mode 100644 docs/Rules/EPC40.md create mode 100644 docs/Rules/EPC41.md create mode 100644 samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs create mode 100644 samples/ErrorProne.Samples/CoreAnalyzers/PrivateMultipleEnumeration.cs create mode 100644 samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/FormatMethodArgumentsAnalyzerTests.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzerTests.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/FormatMethodArgumentsAnalyzer.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzer.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs create mode 100644 src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs diff --git a/docs/Rules/EPC38.md b/docs/Rules/EPC38.md new file mode 100644 index 0000000..b55bd25 --- /dev/null +++ b/docs/Rules/EPC38.md @@ -0,0 +1,104 @@ +# EPC38 - Do not re-enumerate `IEnumerable` + +Detects when a deferred `IEnumerable` / `IEnumerable>` is observed (enumerated) more than once +inside the same method body. + +## Description + +The standard way to "fan out" async work in .NET is to build a sequence of tasks from some input and then +wait for all of them: + +```csharp +var tasks = ids.Select(async id => await DoWorkAsync(id)); +await Task.WhenAll(tasks); +``` + +This *looks* like it produces a stable collection, but `Select` returns a **deferred** `IEnumerable>`: +every enumeration restarts the projection and creates a fresh set of tasks. So this seemingly innocent code: + +```csharp +var tasks = ids.Select(async id => await DoWorkAsync(id)); +await Task.WhenAll(tasks); +foreach (var t in tasks) // ❌ EPC38 +{ + Process(await t); +} +``` + +…does **not** process the results of the tasks we awaited. It starts a brand-new set of `DoWorkAsync` calls, +awaits each one again, and processes those. The work is duplicated, side effects fire twice, and timing- +sensitive behavior breaks in confusing ways. + +Materializing the sequence up front fixes the bug: + +```csharp +var tasks = ids.Select(async id => await DoWorkAsync(id)).ToArray(); +await Task.WhenAll(tasks); +foreach (var t in tasks) // ✅ tasks is Task[] — safe to re-enumerate +{ + Process(await t); +} +``` + +The analyzer flags **any** symbol whose static type is `IEnumerable`, `IEnumerable>`, +`IEnumerable`, or `IEnumerable>` that is enumerated more than once inside the same +method body. Both observations matter — the rule does not require the first one to be `Task.WhenAll`. + +## Code that triggers the analyzer + +```csharp +async Task FooAsync(IEnumerable ids) +{ + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + foreach (var t in tasks) { await t; } // ❌ EPC38 +} + +void Counts(IEnumerable> tasks) +{ + var count = tasks.Count(); + var sum = tasks.Sum(t => t.Result); // ❌ EPC38 +} + +async Task ManualAsync(IEnumerable tasks) +{ + foreach (var t in tasks) { await t; } + foreach (var t in tasks) { _ = t.Status; } // ❌ EPC38 (strict superset case) +} +``` + +## How to fix + +Materialize the sequence once and use the concrete collection from then on: + +```csharp +async Task FooAsync(IEnumerable ids) +{ + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }).ToArray(); + await Task.WhenAll(tasks); + foreach (var t in tasks) { await t; } // ✅ Task[] — safe +} +``` + +`ToList()`, `ToArray()`, and similar materializing operators all work. If the source is a parameter and you +control the API, prefer to take `IReadOnlyList>` or `Task[]` instead of `IEnumerable>`, +which both communicates intent and makes the rule fall silent. + +## When NOT to suppress + +In almost every case this is a real bug. Suppressing it usually means you've convinced yourself the producer +is *not* deferred — in which case the right fix is to change the static type of the variable to the +materialized type (`IReadOnlyList>`, `Task[]`, etc.) so future readers don't have to make the +same leap. + +## Limitations (v1) + +- Analysis is scoped to a single method body. The rule does not track tasks passed across method boundaries. +- Field and property accesses are not tracked. +- Lambdas / local functions are analyzed as independent scopes. +- Re-assignment (`tasks = freshSource;`) resets tracking from that point onward. + +## See also + +- [CA1851: Possible multiple enumerations of `IEnumerable` collection](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca1851) — the general-purpose rule. EPC38 specifically targets the task-producing case where re-enumeration is almost always a bug, not just a performance issue. +- [EPC39](EPC39.md) — quadratic enumeration inside a loop. diff --git a/docs/Rules/EPC39.md b/docs/Rules/EPC39.md new file mode 100644 index 0000000..d2e90c6 --- /dev/null +++ b/docs/Rules/EPC39.md @@ -0,0 +1,98 @@ +# EPC39 - Same enumerable iterated more than once inside a loop + +Detects three patterns where the *same* `IEnumerable` is iterated more than once inside an outer loop, +producing O(N²) (or worse) behavior. + +## Description + +Iterating the same source twice inside a loop turns an O(N) walk into an O(N·M) one. When the inner pass +is a deferred LINQ chain, every outer iteration also re-runs every chained filter / projection. + +The analyzer recognizes three sub-patterns and reports each with a tailored message: + +### Q1 — Same-symbol nested enumeration + +```csharp +foreach (var x in coll) +{ + var n = coll.Count(); // ❌ EPC39 — Count() re-iterates the entire collection on every step +} +``` + +### Q2 — Deferred IEnumerable re-enumerated in a loop + +```csharp +IEnumerable q = source.Where(x => x > 0); +foreach (var i in outer) +{ + var c = q.Count(); // ❌ EPC39 — Where(...) re-runs every iteration +} +``` + +### Q4 — Nested foreach over the same source + +```csharp +foreach (var x in coll) +{ + foreach (var y in coll) { ... } // ❌ EPC39 — O(N²) +} +``` + +## How to fix + +### Q1 / Q2 — materialize once, or use a different shape + +```csharp +var arr = coll.ToArray(); // or .ToList() +foreach (var x in arr) +{ + var n = arr.Length; // O(1) +} +``` + +If the inner work is genuinely "count every element matching X", pre-compute it outside the loop or replace +the loop body with a single LINQ aggregation that performs one pass. + +### Q4 — symmetric problems usually benefit from a hash join + +```csharp +var lookup = new HashSet(coll); +foreach (var x in coll) +{ + if (lookup.Contains(-x)) { ... } +} +``` + +## When NOT to suppress + +If the data set is *known* to be small (say, ≤ 16 elements) and the loop is on a hot path with a tight +cache footprint, a linear scan can actually be faster than a HashSet lookup. In those cases prefer to +suppress at the call site with a comment explaining the choice: + +```csharp +#pragma warning disable EPC39 // List is bounded to <=8 elements; HashSet has higher constant factor. +``` + +## Patterns intentionally NOT flagged + +- Calling `List.Contains`, `Array.IndexOf`, `List.FindIndex`, etc. inside a loop on a collection + that is **not** the loop's source. The cost is O(N·M), but in real codebases these patterns are + frequently correct on small inputs and previously produced too much noise. Use + [CA1851](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca1851) + if you want broader "any double-enumeration" coverage, or [EPC23](EPC23.md) for the specific + `HashSet.Contains`-via-LINQ misuse. +- `ImmutableArray.Contains` and `Span.IndexOf` — they don't allocate and are typically used on + tiny inputs. + +## Limitations (v2) + +- Analysis is scoped to a single method body. Helper calls that internally enumerate are not tracked. +- Q1/Q2 LINQ root resolution stops at the first non-chain call. A chain like + `GetItems().Where(...).Count()` is not flagged unless the root is a local/parameter (because resolving + the source through arbitrary method bodies is unsound). + +## See also + +- [CA1851: Possible multiple enumerations of `IEnumerable` collection](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca1851) — the general-purpose rule. EPC39 is more aggressive in the *quadratic* dimension (it fires inside loops) but narrower than CA1851 in scope (it only fires when the source is the same enumerable). +- [EPC38](EPC38.md) — re-enumeration of `IEnumerable` (correctness bug). +- [EPC23](EPC23.md) — `Enumerable.Contains` on a `HashSet` (a related "wrong tool for the job" pattern). diff --git a/docs/Rules/EPC40.md b/docs/Rules/EPC40.md new file mode 100644 index 0000000..82b3c95 --- /dev/null +++ b/docs/Rules/EPC40.md @@ -0,0 +1,147 @@ +# EPC40 - Multiple enumeration of deferred query passed to private method + +Detects a cross-method variant of the "multiple enumeration" bug that CA1851 cannot diagnose: +a private method enumerates its `IEnumerable` parameter more than once **and** a caller passes a +deferred LINQ query (or iterator method, or `Enumerable.Range/Repeat/Empty`). Every enumeration +re-runs the entire query. + +The rule is restricted to `private` methods because callers form a *closed world* within the +compilation — we can enumerate them all and decide statically whether any of them passes a deferred +source. For public/internal methods this would be undecidable. + +## When the rule fires + +A diagnostic is produced at the **call site** when **all** of these conditions hold: + +1. The target method is `private`. +2. The target method takes one or more `IEnumerable` (or non-generic `IEnumerable`) parameters. +3. The target method body enumerates that parameter **≥ 2 times** (counting `foreach`, LINQ + enumerating methods, and `Task.WhenAll(items)`/`WaitAll`/etc.). +4. The total number of call sites for the method is **≤ 3** (configurable cap; above the cap the + rule conservatively gives up). +5. The argument expression at *this* call site is a **deferred query**, determined by walking up to + **3 hops** through local declarations in the caller's body. A "deferred query" is one of: + - A LINQ chain method call (`Where`, `Select`, `OrderBy`, `SelectMany`, `Concat`, `Take`, ...). + - `Enumerable.Range`, `Enumerable.Repeat`, or `Enumerable.Empty`. + - An invocation of an **iterator method** (a method with `yield return` / `yield break` in its + body). + +## Examples + +### ❌ Bad + +```csharp +private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); // <-- second enumeration +} + +void Caller(IEnumerable src) { + Helper(src.Where(x => x > 0)); // ❌ EPC40 — deferred query, Helper iterates twice +} +``` + +### ❌ Bad — iterator producer + +```csharp +static IEnumerable Produce() { + yield return 1; + yield return 2; +} + +void Caller() { + Helper(Produce()); // ❌ EPC40 — iterator method +} +``` + +### ❌ Bad — 2-hop provenance + +```csharp +void Caller(IEnumerable src) { + var step1 = src.Where(x => x > 0); + var step2 = step1; + Helper(step2); // ❌ EPC40 — walked 2 hops back to .Where(...) +} +``` + +### ✅ Good — materialized argument + +```csharp +void Caller(IEnumerable src) { + Helper(src.Where(x => x > 0).ToList()); // breaks the deferred chain +} +``` + +### ✅ Good — change the parameter type + +```csharp +private int Helper(IReadOnlyList items) { // signals intent + lets you index/Count in O(1) + var c = items.Count; + var sum = 0; + foreach (var x in items) sum += x; + return c + sum; +} +``` + +### ✅ Not flagged — list/array caller + +```csharp +void Caller(List src) { + Helper(src); // List is materialized; re-enumeration is cheap +} +``` + +### ✅ Not flagged — caller forwards its own parameter + +```csharp +void Caller(IEnumerable src) { + Helper(src); // We can't tell whether 'src' itself is a query without inter-procedural analysis +} +``` + +This is intentional — chasing provenance further would require inter-procedural dataflow, which +explodes quickly. If the *immediate* caller is itself private, see the **transitivity** note below. + +## How to fix + +Pick the right tool for the situation: + +| Situation | Fix | +| --- | --- | +| Helper genuinely needs Count + foreach | Change the parameter type to `IReadOnlyList` / `IReadOnlyCollection` / `T[]` | +| Helper genuinely needs to enumerate twice | Call `items = items.ToList()` at the top of the helper, or materialize at the caller | +| Helper can be rewritten as one pass | Restructure to a single enumeration (often the best fix) | + +Materializing at the **caller** preserves the helper's signature and makes the cost explicit to readers: + +```csharp +Helper(src.Where(x => x > 0).ToList()); +``` + +Materializing at the **callee** localizes the fix: + +```csharp +private int Helper(IEnumerable items) { + var list = items.ToList(); + return list.Count + list.Sum(); +} +``` + +## Limitations (v1) + +- **Scope is `private` only.** `internal` methods are excluded for now because consumers under + `InternalsVisibleTo` can break the closed-world assumption. May be expanded in v1.1 after dogfooding. +- **Caller cap = 3.** If a private method has more than 3 call sites, the rule does not fire. Such + methods are effectively public within the assembly and the signal-to-noise drops. +- **Provenance hops = 3.** Chains longer than 3 local declarations are not followed. +- **No transitivity.** If `A` calls private `B` calls private `C`, and `C` enumerates twice and the + call from `B` to `C` forwards a deferred parameter of `B`, this rule does *not* propagate the + warning back to `A`. Transitive analysis is plausible but out of scope for v1. +- **No type-flow.** A `Func>` returning a query, invoked at the call site, is not + resolved through the delegate. + +## See also + +- [CA1851: Possible multiple enumerations of `IEnumerable` collection](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca1851) — same hazard, single-method scope only. +- [EPC38](EPC38.md) — `IEnumerable` re-enumeration (correctness bug variant). +- [EPC39](EPC39.md) — quadratic enumeration inside a loop (performance bug variant). diff --git a/docs/Rules/EPC41.md b/docs/Rules/EPC41.md new file mode 100644 index 0000000..f3a6691 --- /dev/null +++ b/docs/Rules/EPC41.md @@ -0,0 +1,115 @@ +# EPC41 - Format string does not match arguments (user-annotated formatting methods) + +Reports calls to user-annotated formatting methods where the format string references argument +indices that are not actually supplied. Catches the same class of bug as +[CA2241](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2241) +(which would throw `FormatException` at runtime) but for **third-party** methods you do not own and +cannot mark with attributes. + +## Description + +`String.Format`-style methods take a format string with placeholders like `{0}`, `{1,10:N2}`. When +a placeholder index is greater than or equal to the number of supplied arguments, the runtime throws +`FormatException`. CA2241 catches this for a fixed list of BCL methods. EPC41 lets you extend the +analysis to your own / third-party libraries by listing them in `.editorconfig`. + +EPC41 ships **no** built-in method list — it does nothing until you annotate at least one method. +Use CA2241 for BCL methods; use EPC41 for everything else. + +## Configuration + +Add the methods to analyze under `dotnet_diagnostic.EPC41.format_methods` in `.editorconfig`: + +```ini +[*.cs] +dotnet_diagnostic.EPC41.format_methods = + MyCorp.Logging.Logger.LogInfo:0; + MyCorp.Logging.Logger.LogError:1; + MyCorp.Logging.Logger.Log*:0; + MyCorp.Logging.Logger.Trace:format; + SomeLib.Tracer.Trace:0 +``` + +Each entry has the form `FullyQualifiedTypeName.MethodName[*]:(index|paramName)`: + +- `FullyQualifiedTypeName` — namespace-qualified containing-type name (no `global::` prefix). For + generic types, use the unbound name (e.g. `MyCorp.Cache`, not `MyCorp.Cache`). +- `MethodName` — the method name. Matches **all overloads** of that name on that type. A trailing + `*` makes it a prefix match (e.g. `Log*` matches `Log`, `LogInfo`, `LogError`, ...). Exact + matches take precedence over wildcards. +- `index` — 0-based index of the parameter holding the format string, **or** +- `paramName` — the name of the parameter holding the format string (resolved per overload, so + this works even when the format parameter has different ordinals across overloads). + +Entries can be separated by `;`, `,`, or newlines. Whitespace is ignored. + +### Example signatures and corresponding spec + +```csharp +public static void Log(string format, params object[] args); // → :0 or :format +public static void Log(IFormatProvider p, string format, params object[] args); // → :1 or :format +public static void Trace(int level, string template, object a, object b); // → :1 or :template +``` + +## What is reported + +For each matching call where the **format string is a compile-time constant**: + +1. **Placeholder index out of range** — e.g. format `"{0} {1}"` with only one trailing argument. +2. **Malformed format string** — unbalanced `{` or `}`, non-numeric placeholder body, missing `}`. + +## What is NOT reported + +- Calls where the format string is not a compile-time constant (e.g. `Log(fmt, x)` where `fmt` is a + parameter or field). +- Calls where the `params` argument is an opaque `object[]` variable (we cannot count its elements + statically). Explicit `new object[] { ... }` literals **are** counted. +- "Too many arguments" — unused trailing arguments are not flagged (intentional: matches CA2241). +- Methods not listed in `format_methods`. +- Structured-logging templates (`{UserName}`-style named placeholders, à la Serilog or + `Microsoft.Extensions.Logging`). EPC41 only understands numeric `{N}` placeholders today. + +## Examples + +Given this configuration: + +```ini +dotnet_diagnostic.EPC41.format_methods = MyCorp.Logging.Logger.Log:0 +``` + +### ❌ Reported + +```csharp +Logger.Log("{0} {1}", 1); // {1} has no argument +Logger.Log("value = {0}"); // no args at all +Logger.Log("unterminated {0", 1); // malformed +Logger.Log("stray } here", 1); // malformed +Logger.Log("{0} {1} {2}", new object[] { 1, 2 }); // counted: 2 < 3 +``` + +### ✅ Not reported + +```csharp +Logger.Log("{0} and {1}", 1, 2); // matches +Logger.Log("no placeholders here"); // no placeholders +Logger.Log("{{not a placeholder}} {0}", 1); // escaped braces +Logger.Log(GetFormat(), 1, 2); // non-constant format +Logger.Log("{0} {1} {2}", existingArray); // params array is opaque +``` + +## How to fix + +- Add the missing argument(s), or remove the offending placeholder. +- For "off-by-one" mistakes, double-check the indices: placeholders are 0-based but it's easy to + read `"{1}"` and reach for the first argument. +- If the format string is dynamically constructed and the analyzer is wrong about a corner case, + suppress at the call site: + + ```csharp + #pragma warning disable EPC41 // dynamically resolved via X + ``` + +## See also + +- [CA2241 - Provide correct arguments to formatting methods](https://learn.microsoft.com/en-us/dotnet/fundamentals/code-analysis/quality-rules/ca2241) + — the BCL-targeted equivalent. EPC41 deliberately omits the BCL list so there is no double-warning. diff --git a/samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs b/samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs new file mode 100644 index 0000000..b0890d4 --- /dev/null +++ b/samples/ErrorProne.Samples/AsyncStuff/TaskReEnumeration.cs @@ -0,0 +1,58 @@ +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; + +namespace ErrorProne.Samples.AsyncStuff +{ + public class TaskReEnumeration + { + // Canonical bug: WhenAll consumes the deferred IEnumerable>; the subsequent foreach + // re-runs Select() and starts a NEW set of tasks that are completely unrelated to the ones we awaited. + public async Task ComputeTotalAsync(IEnumerable ids) + { + var tasks = ids.Select(async id => + { + await Task.Delay(1); + return id * 2; + }); + + await Task.WhenAll(tasks); + + var total = 0; + foreach (var t in tasks) // ❌ EPC38 — second enumeration starts new tasks + { + total += await t; + } + + return total; + } + + // Two LINQ aggregations on the same deferred enumerable: also a re-enumeration bug. + public int CountAndSum(IEnumerable> tasks) + { + var count = tasks.Count(); + var sum = tasks.Sum(t => t.Result); // ❌ EPC38 + return count + sum; + } + + // Fix: materialize the deferred enumerable first so re-iteration is safe. + public async Task ComputeTotalFixedAsync(IEnumerable ids) + { + var tasks = ids.Select(async id => + { + await Task.Delay(1); + return id * 2; + }).ToArray(); + + await Task.WhenAll(tasks); + + var total = 0; + foreach (var t in tasks) // ✅ safe — `tasks` is Task[] + { + total += await t; + } + + return total; + } + } +} diff --git a/samples/ErrorProne.Samples/CoreAnalyzers/PrivateMultipleEnumeration.cs b/samples/ErrorProne.Samples/CoreAnalyzers/PrivateMultipleEnumeration.cs new file mode 100644 index 0000000..18ba5b9 --- /dev/null +++ b/samples/ErrorProne.Samples/CoreAnalyzers/PrivateMultipleEnumeration.cs @@ -0,0 +1,71 @@ +using System.Collections.Generic; +using System.Linq; + +namespace ErrorProne.Samples.CoreAnalyzers +{ + public class PrivateMultipleEnumeration + { + // ❌ Helper enumerates `items` twice. Any private caller passing a deferred query is reported. + private int Helper(IEnumerable items) + { + var c = items.Count(); + return c + items.Sum(); + } + + // ✅ Materialized version — safe to enumerate multiple times. + private int HelperFixed(IReadOnlyList items) + { + var c = items.Count; + var sum = 0; + foreach (var x in items) sum += x; + return c + sum; + } + + static IEnumerable Produce() + { + yield return 1; + yield return 2; + yield return 3; + } + + public void Caller_Where(IEnumerable source) + { + Helper(source.Where(x => x > 0)); // ❌ EPC40 — .Where(...) is deferred + } + + public void Caller_Iterator() + { + Helper(Produce()); // ❌ EPC40 — iterator method + } + + public void Caller_Range() + { + Helper(Enumerable.Range(0, 100)); // ❌ EPC40 — Enumerable.Range + } + + public void Caller_LocalQuery(IEnumerable source) + { + var query = source.Where(x => x > 0); + Helper(query); // ❌ EPC40 — 1-hop provenance back to .Where(...) + } + + public void Caller_TwoHopQuery(IEnumerable source) + { + var step1 = source.Where(x => x > 0); + var step2 = step1; + Helper(step2); // ❌ EPC40 — 2-hop provenance back to .Where(...) + } + + // ✅ Materialized argument — no warning. + public void Caller_List(List items) + { + Helper(items); + } + + // ✅ Materialize the query at the call site. + public void Caller_FixedByMaterialization(IEnumerable source) + { + Helper(source.Where(x => x > 0).ToList()); + } + } +} diff --git a/samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs b/samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs new file mode 100644 index 0000000..2915329 --- /dev/null +++ b/samples/ErrorProne.Samples/CoreAnalyzers/QuadraticEnumeration.cs @@ -0,0 +1,63 @@ +using System.Collections.Generic; +using System.Linq; + +namespace ErrorProne.Samples.CoreAnalyzers +{ + public class QuadraticEnumeration + { + // Q1 — LINQ-enumerating call whose source is the same enumerable being iterated. O(N^2). + public int SameSourceCount(IEnumerable coll) + { + var total = 0; + foreach (var x in coll) + { + var n = coll.Count(); // ❌ EPC39 (Q1) + total += x * n; + } + + return total; + } + + // Q2 — re-enumeration of a deferred IEnumerable inside a loop. Each iteration re-runs Where(). + public int DeferredInLoop(IEnumerable source, int[] outer) + { + IEnumerable q = source.Where(x => x > 0); + var total = 0; + foreach (var i in outer) + { + total += q.Count(); // ❌ EPC39 (Q2) + } + + return total; + } + + // Q4 — nested foreach over the same source. + public int NestedForeachSameSource(List coll) + { + var pairs = 0; + foreach (var x in coll) + { + foreach (var y in coll) // ❌ EPC39 (Q4) + { + if (x + y == 0) pairs++; + } + } + + return pairs; + } + + // ✅ Fixes: + + public int DeferredInLoopFixed(IEnumerable source, int[] outer) + { + var q = source.Where(x => x > 0).ToList(); // materialize once + var total = 0; + foreach (var i in outer) + { + total += q.Count; + } + + return total; + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs b/src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs new file mode 100644 index 0000000..a64ce97 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers.Tests/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzerTests.cs @@ -0,0 +1,283 @@ +using NUnit.Framework; +using System.Threading.Tasks; +using Verify = ErrorProne.NET.TestHelpers.CSharpCodeFixVerifier< + ErrorProne.NET.AsyncAnalyzers.TaskEnumerableReEnumerationAnalyzer, + Microsoft.CodeAnalysis.Testing.EmptyCodeFixProvider>; + +namespace ErrorProne.NET.CoreAnalyzers.Tests.AsyncAnalyzers +{ + [TestFixture] + public class TaskEnumerableReEnumerationAnalyzerTests + { + [Test] + public async Task WarnsOnWhenAllThenForeach() + { + // Canonical bug: WhenAll consumes once, then foreach re-runs the iterator. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + foreach (var t in [|tasks|]) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnDoubleLinqAggregation() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + void Foo(IEnumerable> tasks) { + var first = tasks.Count(); + var second = [|tasks|].Count(); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_SingleEnumeration_WithWhenAll() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_SingleEnumeration_Foreach() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_TaskArray_DoubleEnumeration() + { + // Task[] is materialized — re-enumeration is safe. + var code = @" +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(Task[] tasks) { + await Task.WhenAll(tasks); + foreach (var t in tasks) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_ListOfTask_DoubleEnumeration() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(List tasks) { + await Task.WhenAll(tasks); + foreach (var t in tasks) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_IReadOnlyListOfTask_DoubleEnumeration() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IReadOnlyList tasks) { + await Task.WhenAll(tasks); + foreach (var t in tasks) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_MaterializedViaToArray() + { + // After ToArray, the local has type Task[], so re-enumeration is safe. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }).ToArray(); + await Task.WhenAll(tasks); + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnWhenAnyThenForeach() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAny(tasks); + foreach (var t in [|tasks|]) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnAlias() + { + // var y = tasks introduces an alias; enumerating both should still warn. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable> tasks) { + var y = tasks; + await Task.WhenAll(y); + foreach (var t in [|tasks|]) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_ReassignmentResetsEpoch() + { + // After re-assignment the second enumeration is on a fresh source. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + await Task.WhenAll(tasks); + tasks = ids.Select(async id => { await Task.Delay(1); return id; }); + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_ManualForeachOnly() + { + // A single manual foreach (no WhenAll) over an IEnumerable is fine. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + foreach (var t in tasks) { await t; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnDoubleAwaitForeach() + { + // Two manual await-foreaches on the same IEnumerable — strict-superset case. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + foreach (var t in tasks) { await t; } + foreach (var t in [|tasks|]) { _ = t.Status; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnValueTaskEnumerable() + { + // IEnumerable exhibits the same hazard. + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable ids) { + var tasks = ids.Select(async id => { await Task.Delay(1); }); + foreach (var t in tasks) { await t; } + foreach (var t in [|tasks|]) { _ = t.IsCompleted; } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_NestedLambdaIndependentScope() + { + // The lambda body has its own block analysis; re-using the parameter inside the lambda once + // does NOT combine with the outer enumeration to form a 2nd site. + var code = @" +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable tasks) { + Action a = () => { foreach (var t in tasks) { } }; + a(); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task WarnsOnLinqAggregationAfterWhenAll() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +class Test { + async Task Foo(IEnumerable> tasks) { + await Task.WhenAll(tasks); + return [|tasks|].Sum(t => t.Result); + } +}"; + await Verify.VerifyAsync(code); + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/FormatMethodArgumentsAnalyzerTests.cs b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/FormatMethodArgumentsAnalyzerTests.cs new file mode 100644 index 0000000..f6082d0 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/FormatMethodArgumentsAnalyzerTests.cs @@ -0,0 +1,337 @@ +using System.Threading.Tasks; +using ErrorProne.NET.TestHelpers; +using Microsoft.CodeAnalysis.CSharp; +using Microsoft.CodeAnalysis.Testing; +using NUnit.Framework; +using Verify = ErrorProne.NET.TestHelpers.CSharpCodeFixVerifier< + ErrorProne.NET.CoreAnalyzers.FormatMethodArgumentsAnalyzer, + Microsoft.CodeAnalysis.Testing.EmptyCodeFixProvider>; + +namespace ErrorProne.NET.CoreAnalyzers.Tests.CoreAnalyzers +{ + [TestFixture] + public class FormatMethodArgumentsAnalyzerTests + { + private const string LoggerStub = @" +namespace MyCorp.Logging { + public static class Logger { + public static void Log(string format, params object[] args) { } + public static void LogWithProvider(System.IFormatProvider provider, string format, params object[] args) { } + public static void LogTwo(string format, object a, object b) { } + public static void LogNoArgs(string format) { } + } +} +"; + + private static Task RunAsync(string source, string editorConfig, params DiagnosticResult[] expected) + { + var test = new Verify.Test + { + LanguageVersion = LanguageVersion.Latest, + TestState = + { + Sources = { source, LoggerStub }, + AnalyzerConfigFiles = { ("/.editorconfig", editorConfig) }, + }, + }.WithoutGeneratedCodeVerification(); + + foreach (var diag in expected) + { + test.ExpectedDiagnostics.Add(diag); + } + + return test.RunAsync(); + } + + private static string EditorConfig(string formatMethods) => +$@"root = true +[*.cs] +dotnet_diagnostic.epc41.format_methods = {formatMethods} +"; + + [Test] + public async Task NoWarn_WhenNotConfigured() + { + // No editorconfig entries → the rule is dormant; even a clearly-broken call is silent. + var code = @" +using MyCorp.Logging; +class C { void M() { Logger.Log(""{0} {1}"", 1); } }"; + + await RunAsync(code, editorConfig: ""); + } + + [Test] + public async Task NoWarn_WhenMethodNotInAllowlist() + { + // The configured method name does not match the called method → no warning. + var code = @" +using MyCorp.Logging; +class C { void M() { Logger.Log(""{0} {1}"", 1); } }"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.LogTwo:0")); + } + + [Test] + public async Task Warn_PlaceholderIndexExceedsArgCount() + { + // Format references {0} and {1} but only one arg supplied → warn. + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.Log(""{0} {1}"", 1)|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task Warn_ZeroArgsButPlaceholderUsed() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.Log(""value = {0}"")|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task NoWarn_WhenAllPlaceholdersHaveArgs() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + Logger.Log(""{0} and {1}"", 1, 2); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task NoWarn_WhenFormatHasNoPlaceholders() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + Logger.Log(""no placeholders here""); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task NoWarn_OnEscapedBraces() + { + // '{{' / '}}' are literal braces, not placeholders. + var code = @" +using MyCorp.Logging; +class C { + void M() { + Logger.Log(""{{not a placeholder}} {0}"", 42); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task Warn_OnMalformedFormat_UnbalancedOpenBrace() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + Logger.Log({|EPC41:""unterminated {0""|}, 1); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task Warn_OnMalformedFormat_StrayCloseBrace() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + Logger.Log({|EPC41:""stray } here""|}, 1); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task NoWarn_WhenFormatIsNotConstant() + { + // Runtime-built format strings cannot be analyzed. + var code = @" +using MyCorp.Logging; +class C { + void M(string fmt) { + Logger.Log(fmt, 1, 2); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task NoWarn_WhenParamsArrayIsOpaque() + { + // Caller passes an existing object[] variable; we cannot count statically → no report. + var code = @" +using MyCorp.Logging; +class C { + void M(object[] xs) { + Logger.Log(""{0} {1} {2}"", xs); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task Warn_WithExplicitArrayLiteral_TooFewElements() + { + // Explicit `new object[] { ... }` literal — we can count. + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.Log(""{0} {1} {2}"", new object[] { 1, 2 })|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task Warn_WithFormatProviderOverload_FormatIndex1() + { + // Format param is at index 1 (after IFormatProvider). + var code = @" +using System.Globalization; +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.LogWithProvider(CultureInfo.InvariantCulture, ""{0} {1}"", 1)|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.LogWithProvider:1")); + } + + [Test] + public async Task Warn_WithFixedTrailingParams_NotParamsArray() + { + // Method signature uses two fixed object parameters (no params array). + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.LogTwo(""{0} {1} {2}"", 1, 2)|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.LogTwo:0")); + } + + [Test] + public async Task Warn_WithLogNoArgs_OverloadHasNoArgs() + { + // Single-parameter format method: any placeholder is automatically out of range. + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.LogNoArgs(""hello {0}"")|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.LogNoArgs:0")); + } + + [Test] + public async Task NoWarn_FormatItemWithAlignmentAndSpec_InRange() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + Logger.Log(""[{0,10:N2}] [{1,-5}]"", 3.14, ""x""); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0")); + } + + [Test] + public async Task MultipleEntries_OneMatches() + { + // Several entries in config — pick the right one for the called method. + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.Log(""{0}"")|}; + Logger.LogTwo(""{0} {1}"", 1, 2); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:0;MyCorp.Logging.Logger.LogTwo:0")); + } + + [Test] + public async Task Warn_WhenPatternMatchesMethod() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.Log(""{0} {1}"", 1)|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log*:0")); + } + + [Test] + public async Task Warn_WhenNamedParameterUsed() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + {|EPC41:Logger.Log(format: ""{0} {1}"", args: new object[] { 1 })|}; + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.Log:format")); + } + + [Test] + public async Task NoWarn_WhenPatternDoesNotMatch() + { + var code = @" +using MyCorp.Logging; +class C { + void M() { + Logger.Log(""{0} {1}"", 1); + } +}"; + + await RunAsync(code, EditorConfig("MyCorp.Logging.Logger.LogTwo:0")); + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzerTests.cs b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzerTests.cs new file mode 100644 index 0000000..eba2275 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzerTests.cs @@ -0,0 +1,355 @@ +using NUnit.Framework; +using System.Threading.Tasks; +using Verify = ErrorProne.NET.TestHelpers.CSharpCodeFixVerifier< + ErrorProne.NET.CoreAnalyzers.PrivateMethodMultipleEnumerationAnalyzer, + Microsoft.CodeAnalysis.Testing.EmptyCodeFixProvider>; + +namespace ErrorProne.NET.CoreAnalyzers.Tests.CoreAnalyzers +{ + [TestFixture] + public class PrivateMethodMultipleEnumerationAnalyzerTests + { + [Test] + public async Task Warns_OnLinqWhereChain() + { + // Canonical case: private helper enumerates `items` twice; caller passes `.Where(...)`. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(IEnumerable source) { + Helper([|source.Where(x => x > 0)|]); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_OnSelectChain() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + foreach (var x in items) { c += x; } + return c; + } + void Caller(IEnumerable source) { + Helper([|source.Select(x => x * 2)|]); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_OnEnumerableRange() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var a = items.Count(); + var b = items.Sum(); + return a + b; + } + void Caller() { + Helper([|System.Linq.Enumerable.Range(0, 10)|]); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_OnIteratorMethodResult() + { + // Caller passes the result of an iterator method (yield-based). + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + static IEnumerable Produce() { + yield return 1; + yield return 2; + } + void Caller() { + Helper([|Produce()|]); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_ThroughOneLocalHop() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(IEnumerable source) { + var query = source.Where(x => x > 0); + Helper([|query|]); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_ThroughTwoLocalHops() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(IEnumerable source) { + var step1 = source.Where(x => x > 0); + var step2 = step1; + Helper([|step2|]); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnList() + { + // Caller passes a materialized list; no re-enumeration penalty. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(List source) { + Helper(source); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnArray() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(int[] source) { + Helper(source); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnToListedQuery() + { + // Materialized — .ToList() breaks the deferred chain. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(IEnumerable source) { + Helper(source.Where(x => x > 0).ToList()); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnIEnumerableParameterReference() + { + // Caller forwards its own IEnumerable parameter — provenance terminates at the parameter + // and we cannot conclude the source is a query. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(IEnumerable source) { + Helper(source); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnPublicHelper() + { + // Public helper — open world; closed-world assumption invalid; rule does not fire. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + public int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(IEnumerable source) { + Helper(source.Where(x => x > 0)); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnInternalHelper() + { + // V1 scope is private only; internal helpers are not tracked. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + internal int Helper(IEnumerable items) { + var c = items.Count(); + return c + items.Sum(); + } + void Caller(IEnumerable source) { + Helper(source.Where(x => x > 0)); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnSingleEnumerationInsideHelper() + { + // Helper enumerates `items` only once — not a candidate. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + return items.Count(); + } + void Caller(IEnumerable source) { + Helper(source.Where(x => x > 0)); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_WhenCallerCountExceedsCap() + { + // 4 distinct callers — past the cap of 3. Conservative: skip. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + return items.Count() + items.Sum(); + } + void A(IEnumerable s) { Helper(s.Where(x => x > 0)); } + void B(IEnumerable s) { Helper(s.Where(x => x > 0)); } + void C(IEnumerable s) { Helper(s.Where(x => x > 0)); } + void D(IEnumerable s) { Helper(s.Where(x => x > 0)); } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_MixedCallers_OnlyOffendingOneReported() + { + // Two callers; one passes a query, one passes a list. Report only the query caller. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + return items.Count() + items.Sum(); + } + void Good(List s) { Helper(s); } + void Bad(IEnumerable s) { Helper([|s.Where(x => x > 0)|]); } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_OnForeachAndSumPattern() + { + // Helper enumerates `items` via foreach AND a LINQ call. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + var s = items.Sum(); + foreach (var x in items) { s += x; } + return s; + } + void Caller(IEnumerable source) { + Helper([|source.Select(x => x + 1)|]); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnUnclassifiedArgument() + { + // Argument is a property access — we can't classify it as a query. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private System.Collections.Generic.IEnumerable Source { get; set; } = new int[] { 1, 2, 3 }; + private int Helper(IEnumerable items) { + return items.Count() + items.Sum(); + } + void Caller() { + Helper(Source); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Warns_ThreeCallers_AllReported() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + private int Helper(IEnumerable items) { + return items.Count() + items.Sum(); + } + void A(IEnumerable s) { Helper([|s.Where(x => x > 0)|]); } + void B(IEnumerable s) { Helper([|s.Where(x => x > 0)|]); } + void C(IEnumerable s) { Helper([|s.Where(x => x > 0)|]); } +}"; + await Verify.VerifyAsync(code); + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs new file mode 100644 index 0000000..b8de13e --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers.Tests/CoreAnalyzers/QuadraticEnumerationAnalyzerTests.cs @@ -0,0 +1,301 @@ +using NUnit.Framework; +using System.Threading.Tasks; +using Verify = ErrorProne.NET.TestHelpers.CSharpCodeFixVerifier< + ErrorProne.NET.CoreAnalyzers.QuadraticEnumerationAnalyzer, + Microsoft.CodeAnalysis.Testing.EmptyCodeFixProvider>; + +namespace ErrorProne.NET.CoreAnalyzers.Tests.CoreAnalyzers +{ + [TestFixture] + public class QuadraticEnumerationAnalyzerTests + { + // -------- Q1: Same-symbol nested enumeration --------------------------------------------------- + + [Test] + public async Task Q1_Warns_OnEnumerableCount_WithSameSource() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable coll) { + foreach (var x in coll) { + var n = [|coll.Count()|]; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q1_Warns_OnAny_WithSameSource() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable coll) { + foreach (var x in coll) { + if ([|coll.Any(y => y > x)|]) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + // -------- Q2: Deferred re-enumeration in a loop ------------------------------------------------ + + [Test] + public async Task Q2_Warns_OnDeferredLocalInsideLoop() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + IEnumerable q = source.Where(x => x > 0); + foreach (var i in outer) { + var c = [|q.Count()|]; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenLocalIsList() + { + // Same shape as Q2 but the local is concretely typed as List — no deferred re-enum. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(int[] outer) { + List q = new List { 1, 2, 3 }; + foreach (var i in outer) { + var c = q.Count; + } + } +}"; + await Verify.VerifyAsync(code); + } + + // -------- Q4: Nested foreach over the same source ---------------------------------------------- + + [Test] + public async Task Q4_Warns_OnNestedForeachOverSameLocal() + { + var code = @" +using System.Collections.Generic; +class Test { + void Foo(List coll) { + foreach (var x in coll) { + foreach (var y in [|coll|]) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q4_NoWarn_OnNestedForeachOverDistinctSource() + { + var code = @" +using System.Collections.Generic; +class Test { + void Foo(List a, List b) { + foreach (var x in a) { + foreach (var y in b) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + // -------- Negative / boundary cases ------------------------------------------------------------ + + [Test] + public async Task NoWarn_OnSingleForeachNoInner() + { + var code = @" +using System.Collections.Generic; +class Test { + void Foo(IEnumerable coll) { + foreach (var x in coll) { } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnEnumerableCount_OutsideLoop() + { + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable coll) { + var n = coll.Count(); + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnForLoopUsingArrayLength() + { + // for (var i = 0; i < arr.Length; i++) — arr.Length is a property, not a method call. + var code = @" +class Test { + void Foo(int[] arr) { + for (var i = 0; i < arr.Length; i++) { + var v = arr[i]; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NoWarn_OnListContainsInsideLoop_DifferentSource() + { + // After the v2 tightening EPC39 only flags re-iteration of the *same* enumerable. A generic + // "List.Contains called in a loop on a different collection" pattern is no longer reported, + // even though it is technically O(N·M) — too noisy in real codebases. + var code = @" +using System.Collections.Generic; +class Test { + void Foo(List haystack, IEnumerable needles) { + foreach (var n in needles) { + if (haystack.Contains(n)) { } + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_Warns_InsideWhileLoop() + { + // EPC39 fires for any loop kind, not just foreach. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int n) { + IEnumerable q = source.Where(x => x > 0); + var i = 0; + while (i < n) { + var c = [|q.Count()|]; + i++; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_AfterMaterialization() + { + // Materialize once before the loop — re-enumeration is now safe. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + var q = source.Where(x => x > 0).ToList(); + foreach (var i in outer) { + var c = q.Count; + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenDeferredLocalIsDeclaredInsideLoopBody() + { + // The deferred IEnumerable local is declared *inside* the loop body — a fresh enumerable is + // bound every iteration, so multiple enumerations within a single iteration do NOT compound + // with the outer loop size (it is a CA1851-style concern, not quadratic-in-the-loop). + var code = @" +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +class Test { + void Foo(IEnumerable types) { + foreach (var type in types) { + IEnumerable attrs = type.GetCustomAttributes(); + var a = attrs.Any(x => x.GetType() == typeof(ObsoleteAttribute)); + var b = attrs.Any(x => x.GetType() == typeof(FlagsAttribute)); + var c = attrs.Any(x => x.GetType() == typeof(SerializableAttribute)); + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenDeferredLocalIsDeclaredInsideLoopBody_SingleEnumeration() + { + // Even a single enumeration on a deferred local declared inside the loop should not fire: + // there is no outer-loop-multiplier to amortize away by hoisting. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + foreach (var i in outer) { + IEnumerable q = source.Where(x => x > i); + var c = q.Count(); + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task Q2_NoWarn_WhenDeferredLocalIsDeclaredInsideNestedBlockOfLoopBody() + { + // A local declared inside an inner `if` block (still syntactically inside the loop body) is + // also fresh per iteration — must not warn. + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, int[] outer) { + foreach (var i in outer) { + if (i > 0) { + IEnumerable q = source.Where(x => x > i); + var a = q.Any(); + var b = q.Count(); + } + } + } +}"; + await Verify.VerifyAsync(code); + } + + [Test] + public async Task NestedLoops_OnlyInnerReports() + { + // Re-enumerating a deferred IEnumerable inside an inner loop should report ONCE (against the + // inner loop's body), not twice (once for the outer and once for the inner). + var code = @" +using System.Collections.Generic; +using System.Linq; +class Test { + void Foo(IEnumerable source, IEnumerable a, IEnumerable b) { + IEnumerable q = source.Where(x => x > 0); + foreach (var x in a) { + foreach (var y in b) { + var c = [|q.Count()|]; + } + } + } +}"; + await Verify.VerifyAsync(code); + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md b/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md index e6c007f..9d8592b 100644 --- a/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md +++ b/src/ErrorProne.NET.CoreAnalyzers/AnalyzerReleases.Unshipped.md @@ -29,6 +29,10 @@ EPC34 | ErrorHandling | Warning | MustUseResultAnalyzer EPC35 | Async | Info | DoNotBlockUnnecessarilyInAsyncMethodsAnalyzer EPC36 | Async | Info | DoNotUseAsyncDelegatesForLongRunningTasksAnalyzer EPC37 | Async | Info | DoNotValidateArgumentsInAsyncMethodsAnalyzer +EPC38 | Async | Disabled | TaskEnumerableReEnumerationAnalyzer +EPC39 | Performance | Disabled | QuadraticEnumerationAnalyzer +EPC40 | Performance | Disabled | PrivateMethodMultipleEnumerationAnalyzer +EPC41 | ErrorHandling | Warning | FormatMethodArgumentsAnalyzer ERP021 | ErrorHandling | Warning | ThrowExAnalyzer ERP022 | ErrorHandling | Warning | SwallowAllExceptionsAnalyzer ERP031 | Concurrency | Warning | ConcurrentCollectionAnalyzer diff --git a/src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs b/src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs new file mode 100644 index 0000000..8fe6e8c --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/AsyncAnalyzers/TaskEnumerableReEnumerationAnalyzer.cs @@ -0,0 +1,411 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System.Collections.Generic; +using System.Collections.Immutable; +using ErrorProne.NET.Core; +using ErrorProne.NET.CoreAnalyzers; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.AsyncAnalyzers +{ + /// + /// EPC38 — detects when a deferred IEnumerable<Task> / IEnumerable<Task<T>> + /// is observed (enumerated) more than once inside the same method body. + /// + /// + /// Every enumeration of such a sequence executes the underlying producer again — e.g. + /// tasks.Select(async x => ...) creates a fresh task per element on each pass. Patterns like + /// await Task.WhenAll(tasks); foreach (var t in tasks) { ... } silently restart the work and + /// observe a different set of tasks the second time, which is almost always a bug. + /// + /// + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class TaskEnumerableReEnumerationAnalyzer : DiagnosticAnalyzerBase + { + /// + public static readonly DiagnosticDescriptor Rule = DiagnosticDescriptors.EPC38; + + /// + public override ImmutableArray SupportedDiagnostics => ImmutableArray.Create(Rule); + + /// + public TaskEnumerableReEnumerationAnalyzer() + : base(Rule) + { + } + + /// + protected override void InitializeCore(AnalysisContext context) + { + context.RegisterOperationBlockAction(AnalyzeOperationBlock); + } + + private static void AnalyzeOperationBlock(OperationBlockAnalysisContext context) + { + // Per-block tracking: for each tracked symbol record its assignment epoch and enumeration sites + // in syntactic order, then report any non-first enumeration within the same epoch. + // + // We treat lambdas / local functions as opaque (their bodies have their own block events). + var trackers = new Dictionary(SymbolEqualityComparer.Default); + + foreach (var block in context.OperationBlocks) + { + CollectSites(block, trackers, context); + } + + ReportSites(trackers, context); + } + + private static void CollectSites( + IOperation root, + Dictionary trackers, + OperationBlockAnalysisContext context) + { + foreach (var op in root.EnumerateChildOperations()) + { + // Don't descend into nested lambdas / local functions — they'll get their own block event. + if (IsInsideNestedFunction(op, root)) + { + continue; + } + + TryRecordAssignment(op, trackers, context.Compilation); + TryRecordEnumeration(op, trackers, context.Compilation); + } + } + + private static bool IsInsideNestedFunction(IOperation op, IOperation root) + { + // An operation that has a different containing local function / lambda than the block root + // is part of a nested function and will be analyzed by its own block-action invocation. + for (var current = op.Parent; current is not null && current != root; current = current.Parent) + { + if (current is IAnonymousFunctionOperation || current is ILocalFunctionOperation) + { + return true; + } + } + + return false; + } + + /// + /// Records that a tracked symbol gained a *new* value, ending its previous "enumeration epoch". + /// Aliasing (var y = x;) is also tracked: if the right-hand side is a reference to another + /// tracked symbol, the two share the same epoch. + /// + private static void TryRecordAssignment( + IOperation op, + Dictionary trackers, + Compilation compilation) + { + switch (op) + { + case IVariableDeclaratorOperation declarator: + { + var symbol = declarator.Symbol; + if (!IsTrackable(symbol.Type, compilation)) + { + return; + } + + var initializer = declarator.Initializer?.Value + ?? (declarator.Parent as IVariableDeclarationOperation)?.Initializer?.Value; + + var tracker = GetOrCreate(trackers, symbol); + var aliasRoot = EnumerationMethods.TryGetRootEnumerableSymbol(initializer, compilation); + if (aliasRoot is not null + && !SymbolEqualityComparer.Default.Equals(aliasRoot, symbol) + && IsTrackable(GetSymbolType(aliasRoot), compilation)) + { + // Right-hand side is itself a trackable enumerable (a parameter or another local). + // Make the two share a tracker so that subsequent enumerations on either name accrue + // toward the same set of sites. + var aliasTarget = GetOrCreate(trackers, aliasRoot); + tracker.AliasTo(aliasTarget); + } + else + { + tracker.NewEpoch(op.Syntax.SpanStart); + } + + return; + } + + case ISimpleAssignmentOperation assignment: + { + var rootSymbol = EnumerationMethods.TryGetRootEnumerableSymbol(assignment.Target, compilation); + if (rootSymbol is null || !IsTrackable(GetSymbolType(rootSymbol), compilation)) + { + return; + } + + var tracker = GetOrCreate(trackers, rootSymbol); + tracker.NewEpoch(op.Syntax.SpanStart); + return; + } + } + } + + /// + /// Records that an enumeration of a tracked symbol happens at the given operation site. + /// + private static void TryRecordEnumeration( + IOperation op, + Dictionary trackers, + Compilation compilation) + { + switch (op) + { + case IForEachLoopOperation foreachLoop: + RecordIfTracked( + foreachLoop.Collection, + trackers, + compilation, + foreachLoop.Collection.Syntax.GetLocation()); + return; + + case IInvocationOperation invocation: + { + var target = invocation.TargetMethod; + + // Task aggregation methods: Task.WhenAll(IEnumerable) and friends. The first + // positional argument is the enumerable being observed. + if (EnumerationMethods.IsTaskAggregationMethod(target, compilation)) + { + if (invocation.Arguments.Length > 0) + { + var argValue = invocation.Arguments[0].Value; + RecordIfTracked(argValue, trackers, compilation, argValue.Syntax.GetLocation()); + } + + return; + } + + // LINQ enumerating methods (Count, ToList, Sum, …) — receiver is the source. + if (EnumerationMethods.IsEnumeratingLinqMethod(target, compilation)) + { + // For instance / reduced-extension calls the source is the receiver. + var source = invocation.Instance ?? (invocation.Arguments.Length > 0 ? invocation.Arguments[0].Value : null); + if (source is not null) + { + RecordIfTracked(source, trackers, compilation, source.Syntax.GetLocation()); + } + + return; + } + + return; + } + } + } + + private static void RecordIfTracked( + IOperation? sourceOperation, + Dictionary trackers, + Compilation compilation, + Location reportLocation) + { + if (sourceOperation is null) + { + return; + } + + var rootSymbol = EnumerationMethods.TryGetRootEnumerableSymbol(sourceOperation, compilation); + if (rootSymbol is null) + { + return; + } + + var symbolType = GetSymbolType(rootSymbol); + if (!IsTrackable(symbolType, compilation)) + { + return; + } + + var tracker = GetOrCreate(trackers, rootSymbol); + tracker.AddEnumeration(sourceOperation.Syntax.SpanStart, reportLocation, rootSymbol.Name); + } + + private static void ReportSites( + Dictionary trackers, + OperationBlockAnalysisContext context) + { + // Each tracker is a separate logical sequence (post-alias resolution we use the *root* of the + // alias chain so multiple aliases collapse into one). We report every enumeration after the first + // in any epoch that has more than one site. + var roots = new HashSet(); + foreach (var tracker in trackers.Values) + { + roots.Add(tracker.Resolve()); + } + + foreach (var root in roots) + { + root.ReportRepeatedSites(context, Rule); + } + } + + private static bool IsTrackable(ITypeSymbol? type, Compilation compilation) + { + return EnumerationMethods.IsTaskEnumerableType(type, compilation); + } + + private static ITypeSymbol? GetSymbolType(ISymbol symbol) + { + return symbol switch + { + ILocalSymbol l => l.Type, + IParameterSymbol p => p.Type, + _ => null, + }; + } + + private static SymbolTracker GetOrCreate(Dictionary trackers, ISymbol symbol) + { + if (!trackers.TryGetValue(symbol, out var tracker)) + { + tracker = new SymbolTracker(symbol); + trackers[symbol] = tracker; + } + + return tracker; + } + + /// + /// Per-symbol enumeration history. Records all assignment positions and all enumeration sites + /// (regardless of visit order), then groups sites into "epochs" delimited by assignment positions + /// at report time. Multiple aliased symbols share a single underlying tracker via + /// / . + /// + private sealed class SymbolTracker + { + private readonly ISymbol _symbol; + private SymbolTracker? _aliasTarget; + + private readonly List _assignmentPositions = new(); + private readonly List _sites = new(); + + public SymbolTracker(ISymbol symbol) => _symbol = symbol; + + public SymbolTracker Resolve() + { + var current = this; + while (current._aliasTarget is not null) + { + current = current._aliasTarget; + } + + return current; + } + + public void AliasTo(SymbolTracker target) + { + var resolvedTarget = target.Resolve(); + if (ReferenceEquals(resolvedTarget, this)) + { + return; + } + + // Migrate any sites/assignments that were recorded on this tracker (e.g. discovered before + // the alias relationship was established) onto the target tracker. + resolvedTarget._sites.AddRange(_sites); + resolvedTarget._assignmentPositions.AddRange(_assignmentPositions); + _sites.Clear(); + _assignmentPositions.Clear(); + + _aliasTarget = resolvedTarget; + } + + public void NewEpoch(int positionStart) + { + Resolve()._assignmentPositions.Add(positionStart); + } + + public void AddEnumeration(int positionStart, Location location, string symbolName) + { + Resolve()._sites.Add(new Site(positionStart, location, symbolName)); + } + + public void ReportRepeatedSites(OperationBlockAnalysisContext context, DiagnosticDescriptor rule) + { + if (_sites.Count < 2) + { + return; + } + + _sites.Sort(static (a, b) => a.Position.CompareTo(b.Position)); + _assignmentPositions.Sort(); + + // Group sites by epoch (= run between consecutive assignment positions). + var epochStart = 0; + while (epochStart < _sites.Count) + { + var epochEnd = FindEpochEnd(epochStart); + if (epochEnd - epochStart >= 2) + { + // Report every site in this epoch after the first one. + for (var i = epochStart + 1; i < epochEnd; i++) + { + var site = _sites[i]; + context.ReportDiagnostic(Diagnostic.Create(rule, site.Location, site.SymbolName)); + } + } + + epochStart = epochEnd; + } + } + + private int FindEpochEnd(int epochStart) + { + // The epoch containing _sites[epochStart] runs up to (but not including) the first site whose + // position is on the other side of an assignment with respect to _sites[epochStart]. + var startPos = _sites[epochStart].Position; + var firstBoundary = -1; + foreach (var assignment in _assignmentPositions) + { + if (assignment > startPos) + { + firstBoundary = assignment; + break; + } + } + + if (firstBoundary < 0) + { + return _sites.Count; + } + + for (var i = epochStart + 1; i < _sites.Count; i++) + { + if (_sites[i].Position >= firstBoundary) + { + return i; + } + } + + return _sites.Count; + } + + private readonly struct Site + { + public int Position { get; } + public Location Location { get; } + public string SymbolName { get; } + + public Site(int position, Location location, string symbolName) + { + Position = position; + Location = location; + SymbolName = symbolName; + } + } + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/FormatMethodArgumentsAnalyzer.cs b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/FormatMethodArgumentsAnalyzer.cs new file mode 100644 index 0000000..f730441 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/FormatMethodArgumentsAnalyzer.cs @@ -0,0 +1,441 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System; +using System.Collections.Concurrent; +using System.Collections.Immutable; +using ErrorProne.NET.CoreAnalyzers; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.CoreAnalyzers +{ + /// + /// EPC41 — for methods marked as formatting methods via the + /// dotnet_diagnostic.EPC41.format_methods .editorconfig option, validates that + /// the format string passed at the configured parameter index references only argument indices + /// that are actually supplied. Catches the same class of bug as CA2241 (which would throw + /// at runtime) but for libraries the user does not own. + /// + /// + /// Configuration syntax: + /// + /// dotnet_diagnostic.EPC41.format_methods = + /// MyCorp.Logging.Logger.LogInfo:0; + /// MyCorp.Logging.Logger.LogError:1 + /// + /// Each entry is FullyQualifiedTypeName.MethodName:formatParamIndex. The match is by + /// name only — every overload of that name is treated as a formatting method. + /// + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class FormatMethodArgumentsAnalyzer : DiagnosticAnalyzerBase + { + /// + public static readonly DiagnosticDescriptor Rule = DiagnosticDescriptors.EPC41; + + /// + public const string FormatMethodsOptionKey = "dotnet_diagnostic.epc41.format_methods"; + + /// + public override ImmutableArray SupportedDiagnostics => ImmutableArray.Create(Rule); + + /// + public FormatMethodArgumentsAnalyzer() + : base(Rule) + { + } + + // Used to format a containing-type for matching against the editorconfig entries. Produces + // e.g. "System.String" / "MyCorp.Logger" (no `global::`, no special keyword names like `string`). + private static readonly SymbolDisplayFormat ContainingTypeFormat = new SymbolDisplayFormat( + typeQualificationStyle: SymbolDisplayTypeQualificationStyle.NameAndContainingTypesAndNamespaces); + + /// + protected override void InitializeCore(AnalysisContext context) + { + context.RegisterCompilationStartAction(OnCompilationStart); + } + + private static void OnCompilationStart(CompilationStartAnalysisContext context) + { + var registry = new FormatMethodRegistry(); + + context.RegisterOperationAction(operationContext => + { + var invocation = (IInvocationOperation)operationContext.Operation; + var syntaxTree = invocation.Syntax.SyntaxTree; + var options = operationContext.Options.AnalyzerConfigOptionsProvider.GetOptions(syntaxTree); + + if (!registry.TryGetFormatStringParamIndex(invocation.TargetMethod, options, out var formatParamIndex)) + { + return; + } + + AnalyzeInvocation(operationContext, invocation, formatParamIndex); + }, OperationKind.Invocation); + } + + private static void AnalyzeInvocation( + OperationAnalysisContext context, + IInvocationOperation invocation, + int formatParamIndex) + { + // invocation.Arguments is sorted by parameter ordinal, so we can index by it directly + // whether the caller used positional or named arguments. + if (formatParamIndex < 0 || formatParamIndex >= invocation.Arguments.Length) + { + return; + } + + AnalyzeFormatString(context, invocation, invocation.Arguments[formatParamIndex]); + } + + private static void AnalyzeFormatString(OperationAnalysisContext context, IInvocationOperation invocation, IArgumentOperation formatArgument) + { + var constant = formatArgument.Value.ConstantValue; + if (!constant.HasValue || constant.Value is not string formatString) + { + // Not a compile-time constant string — we cannot reason about it. + return; + } + + if (formatArgument.Parameter is null + || !TryCountTrailingArgs(invocation, formatArgument.Parameter.Ordinal, out var argCount)) + { + // Caller passed e.g. an opaque object[] for the params slot; cannot reason statically. + return; + } + + var parseResult = FormatStringParser.Parse(formatString); + if (parseResult.IsMalformed) + { + context.ReportDiagnostic(Diagnostic.Create( + Rule, + formatArgument.Syntax.GetLocation(), + $"Format string is malformed near index {parseResult.MalformedAt}: '{Truncate(formatString, parseResult.MalformedAt)}'")); + return; + } + + if (parseResult.MaxIndex >= argCount) + { + context.ReportDiagnostic(Diagnostic.Create( + Rule, + invocation.Syntax.GetLocation(), + $"Format placeholder {{{parseResult.MaxIndex}}} references argument index {parseResult.MaxIndex}, but only {argCount} argument(s) supplied; the call would throw FormatException at runtime.")); + } + } + + private static bool TryCountTrailingArgs( + IInvocationOperation invocation, + int formatParamIndex, + out int argCount) + { + argCount = 0; + for (var i = formatParamIndex + 1; i < invocation.Arguments.Length; i++) + { + var arg = invocation.Arguments[i]; + var parameter = arg.Parameter; + + if (parameter is { IsParams: true }) + { + // Roslyn wraps individually-passed params args into an implicit array creation. + if (arg.Value is IArrayCreationOperation { IsImplicit: true, Initializer: { } init }) + { + argCount += init.ElementValues.Length; + continue; + } + + // Caller passed a single array expression directly. If it's an explicit array + // literal we can still count its elements; otherwise bail out. + if (arg.Value is IArrayCreationOperation { Initializer: { } explicitInit }) + { + argCount += explicitInit.ElementValues.Length; + continue; + } + + return false; + } + + argCount++; + } + + return true; + } + + private static string Truncate(string s, int around) + { + const int window = 12; + var start = Math.Max(0, around - window); + var end = Math.Min(s.Length, around + window); + return s.Substring(start, end - start); + } + + /// + /// Lazily parses dotnet_diagnostic.EPC41.format_methods values and caches the + /// resulting entry table keyed by the raw editorconfig string. + /// + /// + /// Entry syntax: FullyQualifiedType.MethodName[*]:(index|paramName). A trailing + /// * on the method name turns the entry into a prefix match against the called + /// method's name. The spec after : is either a zero-based positional index, or the + /// name of the format-string parameter on the resolved method. + /// + private sealed class FormatMethodRegistry + { + private readonly ConcurrentDictionary _cache = + new ConcurrentDictionary(StringComparer.Ordinal); + + public bool TryGetFormatStringParamIndex( + IMethodSymbol method, + AnalyzerConfigOptions options, + out int formatParamIndex) + { + formatParamIndex = -1; + + if (!options.TryGetValue(FormatMethodsOptionKey, out var raw) || string.IsNullOrWhiteSpace(raw)) + { + return false; + } + + var entries = _cache.GetOrAdd(raw, Parse); + if (entries.IsEmpty) + { + return false; + } + + var containing = method.ContainingType?.ToDisplayString(ContainingTypeFormat); + if (containing == null) + { + return false; + } + + // Exact match takes precedence over wildcard match. + var exactKey = containing + "." + method.Name; + if (entries.Exact.TryGetValue(exactKey, out var spec) + && TryResolveSpec(method, spec, out formatParamIndex)) + { + return true; + } + + foreach (var wildcard in entries.Wildcards) + { + if (!string.Equals(wildcard.Containing, containing, StringComparison.Ordinal)) + { + continue; + } + + if (!method.Name.StartsWith(wildcard.Prefix, StringComparison.Ordinal)) + { + continue; + } + + if (TryResolveSpec(method, wildcard.Spec, out formatParamIndex)) + { + return true; + } + } + + return false; + } + + private static bool TryResolveSpec(IMethodSymbol method, string spec, out int formatParamIndex) + { + if (int.TryParse(spec, out formatParamIndex)) + { + return formatParamIndex >= 0 && formatParamIndex < method.Parameters.Length; + } + + for (var i = 0; i < method.Parameters.Length; i++) + { + if (string.Equals(method.Parameters[i].Name, spec, StringComparison.Ordinal)) + { + formatParamIndex = i; + return true; + } + } + + formatParamIndex = -1; + return false; + } + + private static ParsedEntries Parse(string raw) + { + var exact = ImmutableDictionary.CreateBuilder(StringComparer.Ordinal); + var wildcards = ImmutableArray.CreateBuilder<(string Containing, string Prefix, string Spec)>(); + + foreach (var rawEntry in raw.Split(EntrySeparators, StringSplitOptions.RemoveEmptyEntries)) + { + var entry = rawEntry.Trim(); + if (entry.Length == 0) + { + continue; + } + + var colon = entry.LastIndexOf(':'); + if (colon <= 0 || colon == entry.Length - 1) + { + continue; + } + + var name = entry.Substring(0, colon).Trim(); + var spec = entry.Substring(colon + 1).Trim(); + if (name.Length == 0 || spec.Length == 0) + { + continue; + } + + if (name.EndsWith("*", StringComparison.Ordinal)) + { + var withoutStar = name.Substring(0, name.Length - 1); + var dot = withoutStar.LastIndexOf('.'); + if (dot <= 0 || dot == withoutStar.Length - 1) + { + // Wildcard without a method-name segment (e.g. "Foo.*") — skip. + continue; + } + + var containing = withoutStar.Substring(0, dot); + var prefix = withoutStar.Substring(dot + 1); + wildcards.Add((containing, prefix, spec)); + } + else + { + exact[name] = spec; + } + } + + return new ParsedEntries(exact.ToImmutable(), wildcards.ToImmutable()); + } + + private static readonly char[] EntrySeparators = { ',', ';', '\r', '\n' }; + + private readonly struct ParsedEntries + { + public ParsedEntries( + ImmutableDictionary exact, + ImmutableArray<(string Containing, string Prefix, string Spec)> wildcards) + { + Exact = exact; + Wildcards = wildcards; + } + + public ImmutableDictionary Exact { get; } + public ImmutableArray<(string Containing, string Prefix, string Spec)> Wildcards { get; } + public bool IsEmpty => Exact.Count == 0 && Wildcards.Length == 0; + } + } + + /// + /// Minimal -compatible format string parser. + /// Returns the maximum referenced placeholder index (-1 if none) and a flag indicating + /// whether the string is malformed. + /// + private static class FormatStringParser + { + public readonly struct Result + { + public Result(int maxIndex, bool isMalformed, int malformedAt) + { + MaxIndex = maxIndex; + IsMalformed = isMalformed; + MalformedAt = malformedAt; + } + + public int MaxIndex { get; } + public bool IsMalformed { get; } + public int MalformedAt { get; } + } + + public static Result Parse(string format) + { + var maxIndex = -1; + var i = 0; + while (i < format.Length) + { + var c = format[i]; + if (c == '{') + { + if (i + 1 < format.Length && format[i + 1] == '{') + { + i += 2; + continue; + } + + // Read placeholder: {N[,alignment][:format]} + i++; + if (i >= format.Length || !IsAsciiDigit(format[i])) + { + return new Result(maxIndex, true, i); + } + + var n = 0; + while (i < format.Length && IsAsciiDigit(format[i])) + { + n = n * 10 + (format[i] - '0'); + if (n > 1_000_000) + { + // Way too large; treat as malformed to avoid pathological inputs. + return new Result(maxIndex, true, i); + } + i++; + } + + if (n > maxIndex) + { + maxIndex = n; + } + + // Optional alignment ',-?digits' and optional format spec ':...'. Skip to '}'. + var sawClose = false; + while (i < format.Length) + { + var d = format[i]; + if (d == '}') + { + sawClose = true; + i++; + break; + } + + if (d == '{') + { + // Nested unescaped '{' inside placeholder: malformed. + return new Result(maxIndex, true, i); + } + + i++; + } + + if (!sawClose) + { + return new Result(maxIndex, true, format.Length); + } + + continue; + } + + if (c == '}') + { + if (i + 1 < format.Length && format[i + 1] == '}') + { + i += 2; + continue; + } + + return new Result(maxIndex, true, i); + } + + i++; + } + + return new Result(maxIndex, false, -1); + } + + private static bool IsAsciiDigit(char c) => c >= '0' && c <= '9'; + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzer.cs b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzer.cs new file mode 100644 index 0000000..d359751 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/PrivateMethodMultipleEnumerationAnalyzer.cs @@ -0,0 +1,471 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using System.Threading; +using ErrorProne.NET.Core; +using ErrorProne.NET.CoreAnalyzers; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.CSharp.Syntax; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.CoreAnalyzers +{ + /// + /// EPC40 — for private methods that enumerate an parameter + /// more than once, cross-references all call sites (capped at ) and reports + /// any caller that passes a deferred LINQ query, / + /// / + /// , or an iterator method. + /// + /// + /// Restricted to private methods because callers form a closed world within the compilation, + /// making the analysis decidable without dataflow. Provenance is followed back at most + /// hops through local declarations in the caller's body. + /// + /// + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class PrivateMethodMultipleEnumerationAnalyzer : DiagnosticAnalyzerBase + { + /// + public static readonly DiagnosticDescriptor Rule = DiagnosticDescriptors.EPC40; + + /// + public override ImmutableArray SupportedDiagnostics => ImmutableArray.Create(Rule); + + /// + public PrivateMethodMultipleEnumerationAnalyzer() + : base(Rule) + { + } + + /// Maximum number of call sites a candidate may have before we skip it entirely. + private const int MaxCallers = 3; + + /// Maximum number of provenance hops to follow back through local declarations. + private const int MaxProvenanceHops = 3; + + /// + protected override void InitializeCore(AnalysisContext context) + { + context.RegisterCompilationStartAction(OnCompilationStart); + } + + private static void OnCompilationStart(CompilationStartAnalysisContext context) + { + var state = new CompilationState(); + + // Pass 1 — identify candidate (method, parameterOrdinal) pairs: private methods whose + // IEnumerable parameter is enumerated >= 2 times in the body. + context.RegisterOperationBlockAction(blockContext => + { + if (blockContext.OwningSymbol is not IMethodSymbol method) + { + return; + } + + if (method.DeclaredAccessibility != Accessibility.Private) + { + return; + } + + // Skip extern / abstract / partial-no-body methods — they have nothing to inspect. + if (method.IsAbstract || method.IsExtern) + { + return; + } + + foreach (var parameter in method.Parameters) + { + if (!EnumerationMethods.IsDeferredEnumerableType(parameter.Type)) + { + continue; + } + + foreach (var block in blockContext.OperationBlocks) + { + if (CountEnumerationsOfParameter(block, parameter, blockContext.Compilation) >= 2) + { + state.AddCandidate(method.OriginalDefinition, parameter.Ordinal); + break; + } + } + } + }); + + // Pass 2 — record every invocation that targets a private method and passes an argument to a + // deferred-IEnumerable parameter. We over-collect cheaply here and filter at the end. + context.RegisterOperationAction(opContext => + { + var invocation = (IInvocationOperation)opContext.Operation; + var target = invocation.TargetMethod.OriginalDefinition; + + if (target.DeclaredAccessibility != Accessibility.Private) + { + return; + } + + foreach (var arg in invocation.Arguments) + { + if (arg.ArgumentKind == ArgumentKind.DefaultValue) + { + // Caller omitted this argument — there is no syntactic source to report on. + continue; + } + + var parameter = arg.Parameter; + if (parameter is null || !EnumerationMethods.IsDeferredEnumerableType(parameter.Type)) + { + continue; + } + + state.AddCallSite(target, parameter.Ordinal, arg); + } + }, OperationKind.Invocation); + + context.RegisterCompilationEndAction(endContext => + { + state.Evaluate(endContext, MaxCallers, MaxProvenanceHops); + }); + } + + /// + /// Counts how many syntactic enumeration sites of appear in + /// . Stops at 2 (the rule only cares whether the count is >= 2). + /// + private static int CountEnumerationsOfParameter(IOperation body, IParameterSymbol parameter, Compilation compilation) + { + var count = 0; + foreach (var op in body.EnumerateChildOperations()) + { + if (IsEnumerationSiteOf(op, parameter, compilation)) + { + if (++count >= 2) + { + return count; + } + } + } + + return count; + } + + private static bool IsEnumerationSiteOf(IOperation op, IParameterSymbol parameter, Compilation compilation) + { + switch (op) + { + case IForEachLoopOperation foreachOp: + { + var root = EnumerationMethods.TryGetRootEnumerableSymbol(foreachOp.Collection, compilation); + return SymbolEqualityComparer.Default.Equals(root, parameter); + } + + case IInvocationOperation invocation: + { + var target = invocation.TargetMethod; + IOperation? source = null; + + if (EnumerationMethods.IsEnumeratingLinqMethod(target, compilation)) + { + source = invocation.Instance ?? + (invocation.Arguments.Length > 0 ? invocation.Arguments[0].Value : null); + } + else if (EnumerationMethods.IsTaskAggregationMethod(target, compilation)) + { + source = invocation.Arguments.Length > 0 ? invocation.Arguments[0].Value : null; + } + + if (source is null) + { + return false; + } + + var root = EnumerationMethods.TryGetRootEnumerableSymbol(source, compilation); + return SymbolEqualityComparer.Default.Equals(root, parameter); + } + + default: + return false; + } + } + + /// + /// Shared, concurrency-safe state collected across the compilation. + /// + private sealed class CompilationState + { + // (methodOriginalDefinition, parameterOrdinal) — set of candidate "problematic" parameters. + private readonly ConcurrentDictionary _candidates = + new ConcurrentDictionary(CandidateKey.Comparer); + + // (methodOriginalDefinition, parameterOrdinal) — list of every recorded call-site argument. + private readonly ConcurrentDictionary> _callSites = + new ConcurrentDictionary>(CandidateKey.Comparer); + + // Per-method "is this an iterator method (has `yield` in its body)?" cache. + private readonly ConcurrentDictionary _iteratorCache = + new ConcurrentDictionary(SymbolEqualityComparer.Default); + + public void AddCandidate(IMethodSymbol method, int parameterOrdinal) + { + _candidates.TryAdd(new CandidateKey(method, parameterOrdinal), 0); + } + + public void AddCallSite(IMethodSymbol method, int parameterOrdinal, IArgumentOperation argument) + { + var bag = _callSites.GetOrAdd(new CandidateKey(method, parameterOrdinal), + _ => new ConcurrentBag()); + bag.Add(argument); + } + + public void Evaluate(CompilationAnalysisContext context, int maxCallers, int maxProvenanceHops) + { + foreach (var entry in _candidates) + { + if (!_callSites.TryGetValue(entry.Key, out var bag)) + { + continue; + } + + var args = bag.ToArray(); + if (args.Length == 0 || args.Length > maxCallers) + { + continue; + } + + var method = entry.Key.Method; + var parameter = method.Parameters[entry.Key.ParameterOrdinal]; + + foreach (var arg in args) + { + var producerDescription = ClassifyProducer(arg, maxProvenanceHops, context.Compilation, _iteratorCache, context.CancellationToken); + if (producerDescription is null) + { + continue; + } + + context.ReportDiagnostic(Diagnostic.Create( + Rule, + arg.Value.Syntax.GetLocation(), + method.Name, + parameter.Name, + producerDescription)); + } + } + } + } + + /// + /// Walks back from through up to local + /// declarations and returns a short human-readable description of the producing expression if it + /// is a deferred query (LINQ chain, Enumerable.Range/Repeat/Empty, or an iterator method). + /// Returns when the producer cannot be classified as a deferred query. + /// + private static string? ClassifyProducer( + IArgumentOperation arg, + int maxHops, + Compilation compilation, + ConcurrentDictionary iteratorCache, + CancellationToken cancellationToken) + { + var enclosingBody = GetEnclosingBody(arg); + IOperation current = arg.Value; + + for (var hop = 0; hop <= maxHops; hop++) + { + cancellationToken.ThrowIfCancellationRequested(); + current = Unwrap(current); + + switch (current) + { + case IInvocationOperation invocation: + { + var target = invocation.TargetMethod; + if (EnumerationMethods.IsLinqChainMethod(target, compilation)) + { + return $".{target.Name}(...)"; + } + + if (EnumerationMethods.IsEnumerableProducerMethod(target, compilation)) + { + return $"Enumerable.{target.Name}(...)"; + } + + if (IsIteratorMethod(target, iteratorCache, cancellationToken)) + { + return $"{target.Name}() (iterator)"; + } + + return null; + } + + case ILocalReferenceOperation localRef when enclosingBody is not null: + { + var init = FindLocalInitializer(localRef.Local, enclosingBody); + if (init is null) + { + return null; + } + + current = init; + continue; + } + + default: + return null; + } + } + + return null; + } + + private static IOperation Unwrap(IOperation op) + { + while (true) + { + switch (op) + { + case IConversionOperation conv: + op = conv.Operand; + continue; + case IParenthesizedOperation paren: + op = paren.Operand; + continue; + default: + return op; + } + } + } + + private static IOperation? GetEnclosingBody(IOperation op) + { + for (var cur = op.Parent; cur is not null; cur = cur.Parent) + { + if (cur is IMethodBodyOperation) + { + return cur; + } + + if (cur is IBlockOperation block && block.Parent is null) + { + return block; + } + + if (cur is IAnonymousFunctionOperation func) + { + return func.Body; + } + + if (cur is ILocalFunctionOperation localFunc) + { + return localFunc.Body; + } + } + + return null; + } + + private static IOperation? FindLocalInitializer(ILocalSymbol local, IOperation body) + { + foreach (var op in body.EnumerateChildOperations()) + { + if (op is IVariableDeclaratorOperation declarator + && SymbolEqualityComparer.Default.Equals(declarator.Symbol, local)) + { + return declarator.Initializer?.Value + ?? (declarator.Parent as IVariableDeclarationOperation)?.Initializer?.Value; + } + } + + return null; + } + + private static bool IsIteratorMethod( + IMethodSymbol method, + ConcurrentDictionary cache, + CancellationToken cancellationToken) + { + return cache.GetOrAdd(method.OriginalDefinition, m => ComputeIsIteratorMethod((IMethodSymbol)m, cancellationToken)); + } + + private static bool ComputeIsIteratorMethod(IMethodSymbol method, CancellationToken cancellationToken) + { + foreach (var syntaxRef in method.DeclaringSyntaxReferences) + { + cancellationToken.ThrowIfCancellationRequested(); + var syntax = syntaxRef.GetSyntax(cancellationToken); + var body = ExtractBody(syntax); + if (body is null) + { + continue; + } + + foreach (var descendant in body.DescendantNodes(static node => + node is not LocalFunctionStatementSyntax && + node is not LambdaExpressionSyntax && + node is not AnonymousMethodExpressionSyntax)) + { + if (descendant is YieldStatementSyntax) + { + return true; + } + } + } + + return false; + } + + private static SyntaxNode? ExtractBody(SyntaxNode syntax) + { + return syntax switch + { + MethodDeclarationSyntax md => (SyntaxNode?)md.Body ?? md.ExpressionBody, + LocalFunctionStatementSyntax lfs => (SyntaxNode?)lfs.Body ?? lfs.ExpressionBody, + AccessorDeclarationSyntax acc => (SyntaxNode?)acc.Body ?? acc.ExpressionBody, + _ => null, + }; + } + + /// + /// Composite key for (method original-definition, parameter ordinal). Equality is based on + /// so the same method symbol coming from different + /// declaration sites compares equal. + /// + private readonly struct CandidateKey : IEquatable + { + public static readonly IEqualityComparer Comparer = new CandidateKeyComparer(); + + public IMethodSymbol Method { get; } + public int ParameterOrdinal { get; } + + public CandidateKey(IMethodSymbol method, int parameterOrdinal) + { + Method = method; + ParameterOrdinal = parameterOrdinal; + } + + public bool Equals(CandidateKey other) => + SymbolEqualityComparer.Default.Equals(Method, other.Method) && + ParameterOrdinal == other.ParameterOrdinal; + + public override bool Equals(object? obj) => obj is CandidateKey k && Equals(k); + + public override int GetHashCode() => + unchecked(SymbolEqualityComparer.Default.GetHashCode(Method) * 397 ^ ParameterOrdinal); + + private sealed class CandidateKeyComparer : IEqualityComparer + { + public bool Equals(CandidateKey x, CandidateKey y) => x.Equals(y); + public int GetHashCode(CandidateKey obj) => obj.GetHashCode(); + } + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs new file mode 100644 index 0000000..2d48f22 --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/CoreAnalyzers/QuadraticEnumerationAnalyzer.cs @@ -0,0 +1,230 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System.Collections.Generic; +using System.Collections.Immutable; +using ErrorProne.NET.Core; +using ErrorProne.NET.CoreAnalyzers; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Diagnostics; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.CoreAnalyzers +{ + /// + /// EPC39 — detects three patterns where the *same* enumerable is iterated more than once inside a loop, + /// producing O(N²) (or worse) behavior: + /// + /// Q1 — same-symbol nested enumeration: a LINQ-enumerating call whose source is the + /// same local/parameter that is also the foreach source. + /// Q2 — re-enumeration of a deferred + /// inside a loop (the receiver's static type is IEnumerable<T>). + /// Q4 — nested foreach over the same source. + /// + /// + /// Generic "O(N) method called inside a loop" patterns (e.g. List<T>.Contains / + /// Array.IndexOf on a different collection than the loop source) are intentionally NOT flagged: + /// they are often correct on small inputs and produced too much noise in real codebases. + /// + /// + [DiagnosticAnalyzer(LanguageNames.CSharp)] + public sealed class QuadraticEnumerationAnalyzer : DiagnosticAnalyzerBase + { + /// + public static readonly DiagnosticDescriptor Rule = DiagnosticDescriptors.EPC39; + + /// + public override ImmutableArray SupportedDiagnostics => ImmutableArray.Create(Rule); + + /// + public QuadraticEnumerationAnalyzer() + : base(Rule) + { + } + + /// + protected override void InitializeCore(AnalysisContext context) + { + context.RegisterOperationAction(AnalyzeLoop, OperationKind.Loop); + } + + private static void AnalyzeLoop(OperationAnalysisContext context) + { + var loop = (ILoopOperation)context.Operation; + + // Resolve the loop's source symbol if it's a foreach over a local/parameter (drives Q1 and Q4). + ISymbol? loopSource = null; + if (loop is IForEachLoopOperation foreachLoop) + { + loopSource = EnumerationMethods.TryGetRootEnumerableSymbol(foreachLoop.Collection, context.Compilation); + } + + var reported = new HashSet(); + + foreach (var op in loop.Body.EnumerateChildOperations()) + { + if (IsInsideNestedFunction(op, loop.Body)) + { + continue; + } + + // Skip any operation that lives inside an inner loop — the inner loop will analyze + // its own body on its own context, and reporting at both levels causes noise. + if (IsInsideInnerLoop(op, loop)) + { + continue; + } + + switch (op) + { + case IForEachLoopOperation inner when loopSource is not null: + { + // Q4 — nested foreach over the same source as the outer foreach. + var innerSource = EnumerationMethods.TryGetRootEnumerableSymbol(inner.Collection, context.Compilation); + if (innerSource is not null && SymbolEqualityComparer.Default.Equals(innerSource, loopSource)) + { + Report(context, reported, inner.Collection.Syntax, + $"nested foreach iterates over the same source '{loopSource.Name}' twice (O(N²))."); + } + + break; + } + + case IInvocationOperation invocation: + AnalyzeInvocation(context, loop, loopSource, invocation, reported); + break; + } + } + } + + private static void AnalyzeInvocation( + OperationAnalysisContext context, + ILoopOperation loop, + ISymbol? loopSource, + IInvocationOperation invocation, + HashSet reported) + { + var compilation = context.Compilation; + var target = invocation.TargetMethod; + + // ------- Q1 & Q2: enumerating LINQ method called on some source ---------------------------- + if (EnumerationMethods.IsEnumeratingLinqMethod(target, compilation)) + { + var sourceOp = invocation.Instance + ?? (invocation.Arguments.Length > 0 ? invocation.Arguments[0].Value : null); + if (sourceOp is null) + { + return; + } + + var rootSymbol = EnumerationMethods.TryGetRootEnumerableSymbol(sourceOp, compilation); + if (rootSymbol is null) + { + return; + } + + // Q1: same symbol as the outer foreach's collection. + if (loopSource is not null && SymbolEqualityComparer.Default.Equals(rootSymbol, loopSource)) + { + Report(context, reported, invocation.Syntax, + $"LINQ method '{target.Name}' is called on the same enumerable '{rootSymbol.Name}' that is being iterated (O(N²))."); + return; + } + + // Q2: source is a deferred IEnumerable local/parameter. + var symbolType = GetSymbolType(rootSymbol); + if (EnumerationMethods.IsDeferredEnumerableType(symbolType)) + { + // If the source local is declared inside the loop body, a fresh enumerable is bound + // each iteration — the cost does NOT compound with the outer loop size. Multiple + // enumerations within a single iteration are a constant-factor multiple-enumeration + // concern (see CA1851), not the quadratic-in-the-loop pattern EPC39 targets. Skip. + if (IsLocalDeclaredInsideLoopBody(rootSymbol, loop)) + { + return; + } + + Report(context, reported, invocation.Syntax, + $"LINQ method '{target.Name}' re-enumerates deferred IEnumerable '{rootSymbol.Name}' on every iteration; materialize it with .ToList()/.ToArray() before the loop."); + return; + } + } + } + + private static bool IsLocalDeclaredInsideLoopBody(ISymbol symbol, ILoopOperation loop) + { + if (symbol is not ILocalSymbol local) + { + return false; + } + + var bodySyntax = loop.Body.Syntax; + var bodyTree = bodySyntax.SyntaxTree; + var bodySpan = bodySyntax.Span; + + foreach (var reference in local.DeclaringSyntaxReferences) + { + if (reference.SyntaxTree == bodyTree && bodySpan.Contains(reference.Span)) + { + return true; + } + } + + return false; + } + + private static void Report( + OperationAnalysisContext context, + HashSet reported, + SyntaxNode syntax, + string message) + { + if (!reported.Add(syntax)) + { + return; + } + + context.ReportDiagnostic(Diagnostic.Create(Rule, syntax.GetLocation(), message)); + } + + private static bool IsInsideNestedFunction(IOperation op, IOperation root) + { + for (var current = op.Parent; current is not null && current != root; current = current.Parent) + { + if (current is IAnonymousFunctionOperation || current is ILocalFunctionOperation) + { + return true; + } + } + + return false; + } + + private static bool IsInsideInnerLoop(IOperation op, ILoopOperation outerLoop) + { + // Walk up from `op` looking for a loop operation that is strictly inside `outerLoop`. + for (var current = op.Parent; current is not null && current != outerLoop; current = current.Parent) + { + if (current is ILoopOperation && current != outerLoop) + { + return true; + } + } + + return false; + } + + private static ITypeSymbol? GetSymbolType(ISymbol symbol) + { + return symbol switch + { + ILocalSymbol l => l.Type, + IParameterSymbol p => p.Type, + _ => null, + }; + } + } +} diff --git a/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs b/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs index 7ff0c4d..5e95de7 100644 --- a/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs +++ b/src/ErrorProne.NET.CoreAnalyzers/DiagnosticDescriptors.cs @@ -312,6 +312,50 @@ internal static class DiagnosticDescriptors description: "Argument validation in async methods does not throw exceptions eagerly. The exception is thrown when the task is awaited, which can lead to unexpected behavior.", helpLinkUri: GetHelpUri(nameof(EPC37))); + /// + public static readonly DiagnosticDescriptor EPC38 = new DiagnosticDescriptor( + nameof(EPC38), + title: "Do not re-enumerate IEnumerable", + messageFormat: "Possible re-enumeration of IEnumerable '{0}'. Each enumeration starts a new set of tasks, which is almost certainly a bug.", + category: AsyncCategory, + defaultSeverity: DiagnosticSeverity.Warning, + isEnabledByDefault: false, + description: "Enumerating an IEnumerable more than once (for example, via Task.WhenAll followed by a foreach over the same enumerable) re-executes the underlying task producer and creates a new, unrelated set of tasks. Materialize with .ToArray() or .ToList() before observing.", + helpLinkUri: GetHelpUri(nameof(EPC38))); + + /// + public static readonly DiagnosticDescriptor EPC39 = new DiagnosticDescriptor( + nameof(EPC39), + title: "Same enumerable iterated more than once inside a loop", + messageFormat: "Quadratic enumeration: {0}", + category: PerformanceCategory, + defaultSeverity: DiagnosticSeverity.Warning, + isEnabledByDefault: false, + description: "Iterating the same enumerable more than once inside a loop turns an O(N) walk into an O(N*M) one. Materialize the source with .ToList()/.ToArray() before the loop, or restructure to enumerate once.", + helpLinkUri: GetHelpUri(nameof(EPC39))); + + /// + public static readonly DiagnosticDescriptor EPC40 = new DiagnosticDescriptor( + nameof(EPC40), + title: "Multiple enumeration of deferred query passed to private method", + messageFormat: "Private method '{0}' enumerates parameter '{1}' multiple times; this argument is a deferred query ({2}) that will be re-executed on each enumeration", + category: PerformanceCategory, + defaultSeverity: DiagnosticSeverity.Warning, + isEnabledByDefault: false, + description: "When a private method enumerates its IEnumerable parameter multiple times and a caller passes a deferred LINQ query, every enumeration re-runs the entire query. The analyzer cross-references all (up to 3) private call sites and reports only when a caller passes a deferred query. Materialize the source (.ToList()/.ToArray()) before passing, or change the parameter type to IReadOnlyList/T[].", + helpLinkUri: GetHelpUri(nameof(EPC40))); + + /// + public static readonly DiagnosticDescriptor EPC41 = new DiagnosticDescriptor( + nameof(EPC41), + title: "Format string does not match arguments", + messageFormat: "{0}", + category: ErrorHandlingCategory, + defaultSeverity: DiagnosticSeverity.Warning, + isEnabledByDefault: true, + description: "Format-string placeholders (e.g. '{0}', '{1}') do not match the arguments supplied to a user-annotated formatting method; the call would throw FormatException at runtime. Annotate formatting methods via 'dotnet_diagnostic.EPC41.format_methods' in .editorconfig (e.g. MyCorp.Logger.Log:0).", + helpLinkUri: GetHelpUri(nameof(EPC41))); + public static string GetHelpUri(string ruleId) { return $"https://github.com/SergeyTeplyakov/ErrorProne.NET/tree/master/docs/Rules/{ruleId}.md"; diff --git a/src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs b/src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs new file mode 100644 index 0000000..3602e0d --- /dev/null +++ b/src/ErrorProne.NET.CoreAnalyzers/EnumerationMethods.cs @@ -0,0 +1,376 @@ +// -------------------------------------------------------------------- +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// -------------------------------------------------------------------- + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using ErrorProne.NET.Core; +using Microsoft.CodeAnalysis; +using Microsoft.CodeAnalysis.Operations; + +namespace ErrorProne.NET.CoreAnalyzers +{ + /// + /// Shared helpers for analyzers that reason about how an + /// is consumed (EPC38, EPC39). + /// + internal static class EnumerationMethods + { + /// + /// Names of methods that fully materialize / iterate the source + /// (i.e. they call GetEnumerator() at least once when invoked). + /// + private static readonly ImmutableHashSet EnumeratingLinqMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "Aggregate", + "All", + "Any", + "Average", + "Contains", + "Count", + "ElementAt", + "ElementAtOrDefault", + "First", + "FirstOrDefault", + "Last", + "LastOrDefault", + "LongCount", + "Max", + "MaxBy", + "Min", + "MinBy", + "SequenceEqual", + "Single", + "SingleOrDefault", + "Sum", + "ToArray", + "ToDictionary", + "ToFrozenDictionary", + "ToFrozenSet", + "ToHashSet", + "ToList", + "ToLookup"); + + /// + /// Names of methods that produce a *new*, materialized collection + /// from the source. + /// + private static readonly ImmutableHashSet MaterializingLinqMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "ToArray", + "ToDictionary", + "ToFrozenDictionary", + "ToFrozenSet", + "ToHashSet", + "ToList", + "ToLookup"); + + /// + /// Names of methods that return a deferred enumerable + /// (LINQ chain methods). They do not enumerate their source. + /// + private static readonly ImmutableHashSet ChainLinqMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "Append", + "AsEnumerable", + "Cast", + "Concat", + "DefaultIfEmpty", + "Distinct", + "DistinctBy", + "Except", + "ExceptBy", + "GroupBy", + "GroupJoin", + "Intersect", + "IntersectBy", + "Join", + "OfType", + "OrderBy", + "OrderByDescending", + "Prepend", + "Reverse", + "Select", + "SelectMany", + "Skip", + "SkipLast", + "SkipWhile", + "Take", + "TakeLast", + "TakeWhile", + "ThenBy", + "ThenByDescending", + "Union", + "UnionBy", + "Where", + "Zip"); + + /// + /// Names of methods on that take an + /// of tasks and observe every element. + /// + private static readonly ImmutableHashSet TaskAggregationMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "WhenAll", + "WhenAny", + "WaitAll", + "WaitAny"); + + /// + /// Names of static methods that *produce* a new deferred + /// from non-enumerable inputs (so every consumer pass starts a fresh + /// iteration). + /// + private static readonly ImmutableHashSet EnumerableProducerMethods = ImmutableHashSet.Create( + StringComparer.Ordinal, + "Empty", + "Range", + "Repeat"); + + /// + /// Returns if invoking consumes its source enumerable + /// at least once (e.g. Count(), ToList(), Any(), …). + /// + public static bool IsEnumeratingLinqMethod(IMethodSymbol method, Compilation compilation) + { + if (method.MethodKind != MethodKind.Ordinary && method.MethodKind != MethodKind.ReducedExtension) + { + return false; + } + + if (!EnumeratingLinqMethods.Contains(method.Name)) + { + return false; + } + + var containing = (method.ReducedFrom ?? method).ContainingType; + return containing.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if is a LINQ chain method + /// (Where, Select, OrderBy, …) that produces a new deferred enumerable. + /// + public static bool IsLinqChainMethod(IMethodSymbol method, Compilation compilation) + { + if (!ChainLinqMethods.Contains(method.Name)) + { + return false; + } + + var containing = (method.ReducedFrom ?? method).ContainingType; + return containing.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if invoking produces a freshly + /// materialized collection from the source (the source is consumed but the *result* is no longer deferred). + /// + public static bool IsMaterializingLinqMethod(IMethodSymbol method, Compilation compilation) + { + if (!MaterializingLinqMethods.Contains(method.Name)) + { + return false; + } + + var containing = (method.ReducedFrom ?? method).ContainingType; + return containing.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if is Task.WhenAll, + /// Task.WhenAny, Task.WaitAll, or Task.WaitAny. + /// + public static bool IsTaskAggregationMethod(IMethodSymbol method, Compilation compilation) + { + if (!method.IsStatic || !TaskAggregationMethods.Contains(method.Name)) + { + return false; + } + + return method.ContainingType.IsClrType(compilation, typeof(System.Threading.Tasks.Task)); + } + + /// + /// Returns if is one of + /// Enumerable.Range, Enumerable.Repeat, or Enumerable.Empty — i.e. + /// a static factory that produces a fresh deferred enumerable each time it is invoked. + /// + public static bool IsEnumerableProducerMethod(IMethodSymbol method, Compilation compilation) + { + if (!method.IsStatic || !EnumerableProducerMethods.Contains(method.Name)) + { + return false; + } + + return method.ContainingType.IsClrType(compilation, typeof(System.Linq.Enumerable)); + } + + /// + /// Returns if is the deferred + /// or non-generic interface itself. Concrete materialized + /// collections (T[], List<T>, IReadOnlyList<T>, HashSet<T>, …) + /// return . + /// + public static bool IsDeferredEnumerableType(ITypeSymbol? type) + { + if (type is null) + { + return false; + } + + // SpecialType lives on the original (unconstructed) definition. For a constructed type like + // IEnumerable>, type.SpecialType is None — we must inspect OriginalDefinition. + switch (type.OriginalDefinition.SpecialType) + { + case SpecialType.System_Collections_Generic_IEnumerable_T: + case SpecialType.System_Collections_IEnumerable: + return true; + } + + // ParallelQuery, OrderedEnumerable, IGrouping, IOrderedEnumerable, etc. are + // intentionally NOT included to keep the rule precise. The most common deferred shape from a LINQ + // chain has the static type IEnumerable, which is handled above. + return false; + } + + /// + /// Returns if is + /// of , Task<T>, ValueTask, or + /// ValueTask<T>. + /// + public static bool IsTaskEnumerableType(ITypeSymbol? type, Compilation compilation) + { + if (type is not INamedTypeSymbol named) + { + return false; + } + + if (!IsDeferredEnumerableType(named)) + { + return false; + } + + if (named.TypeArguments.Length != 1) + { + return false; + } + + return named.TypeArguments[0].IsTaskLike(compilation); + } + + /// + /// Returns if is some materialized + /// T[]/List<T>/IReadOnlyList<T>/IList<T>/ + /// ICollection<T>/IReadOnlyCollection<T>/ImmutableArray<T>/ + /// HashSet<T> — i.e. cases where re-enumeration is safe. + /// + public static bool IsMaterializedCollectionType(ITypeSymbol? type) + { + if (type is null) + { + return false; + } + + if (type.TypeKind == TypeKind.Array) + { + return true; + } + + // Match by metadata name to avoid one-by-one compilation lookups (and to handle missing references). + return type.OriginalDefinition.SpecialType switch + { + SpecialType.System_Collections_Generic_ICollection_T => true, + SpecialType.System_Collections_Generic_IList_T => true, + SpecialType.System_Collections_Generic_IReadOnlyCollection_T => true, + SpecialType.System_Collections_Generic_IReadOnlyList_T => true, + _ => IsMaterializedByMetadataName(type), + }; + } + + private static bool IsMaterializedByMetadataName(ITypeSymbol type) + { + var name = type.OriginalDefinition.ToDisplayString(); + return name switch + { + "System.Collections.Generic.List" => true, + "System.Collections.Generic.HashSet" => true, + "System.Collections.Generic.Dictionary" => true, + "System.Collections.Generic.SortedSet" => true, + "System.Collections.Generic.SortedList" => true, + "System.Collections.Generic.SortedDictionary" => true, + "System.Collections.Generic.Queue" => true, + "System.Collections.Generic.Stack" => true, + "System.Collections.Immutable.ImmutableArray" => true, + "System.Collections.Immutable.ImmutableList" => true, + "System.Collections.Immutable.ImmutableHashSet" => true, + "System.Collections.Immutable.ImmutableSortedSet" => true, + "System.Collections.Immutable.ImmutableDictionary" => true, + "System.Collections.Frozen.FrozenSet" => true, + "System.Collections.Frozen.FrozenDictionary" => true, + "System.Collections.Concurrent.ConcurrentBag" => true, + _ => false, + }; + } + + /// + /// Walks back through LINQ chain calls (x.Where(...).Select(...).OrderBy(...)) and casts/conversions + /// to find the underlying local/parameter that the chain ultimately reads. + /// + /// Returns the root if it is a or + /// , otherwise . + /// + public static ISymbol? TryGetRootEnumerableSymbol(IOperation? operation, Compilation compilation) + { + while (operation is not null) + { + switch (operation) + { + case IConversionOperation conv: + operation = conv.Operand; + continue; + + case IParenthesizedOperation paren: + operation = paren.Operand; + continue; + + case ILocalReferenceOperation local: + return local.Local; + + case IParameterReferenceOperation parameter: + return parameter.Parameter; + + case IInvocationOperation invocation: + { + // LINQ chain: keep walking back through `.Where(...).Select(...)` to the chain root. + if (IsLinqChainMethod(invocation.TargetMethod, compilation)) + { + // For an extension method, the source is the first argument; for the (rare) + // explicit Enumerable.Xxx(source, ...) form it is the same. + if (invocation.Arguments.Length > 0) + { + operation = invocation.Arguments[0].Value; + continue; + } + + operation = invocation.Instance; + continue; + } + + // Any other invocation is a wall; we cannot reason about it. + return null; + } + + default: + return null; + } + } + + return null; + } + } +}