From 648f4bc31ab93080f58745dba996aba9011e0bfc Mon Sep 17 00:00:00 2001 From: Steve Hansen Date: Sat, 30 May 2026 14:08:16 +0200 Subject: [PATCH 1/2] fix: quote fields containing a carriage return in CsvWriter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per RFC 4180 a field that contains CR, LF, the separator, or a quote must be quoted. CsvWriter only triggered on '\n', the separator, the single quote, and '"', so a value like `a\rb` was written unquoted — mis-parsed by strict readers and split into two records when re-read. CsvBufferWriter already included '\r'; all CsvWriter paths (sync, async, and the ReadOnlyMemory paths) now match it. Also removes the per-row char[] allocation in WriteLine/WriteLineAsync by caching the fixed quote-trigger characters in a static array and checking the variable separator separately. Surfaced by Gemini's review of #127. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 8 ++++++++ Csv.Tests/WriterTests.cs | 36 ++++++++++++++++++++++++++++++++++++ Csv/CsvWriter.cs | 13 ++++++++----- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9314801..fe5d48b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- `CsvWriter` not quoting fields that contain a bare carriage return (`\r`) + - Per RFC 4180 a field containing CR, LF, the separator, or a quote must be quoted; `CsvWriter` only triggered on `\n`, the separator, `'`, and `"`, so a value like `a\rb` was written unquoted — mis-parsed by strict readers and split into two records when re-read + - `CsvBufferWriter` already included `\r`; all `CsvWriter` paths (sync, async, and the `ReadOnlyMemory` paths) now match it + +### Performance +- Removed a per-row `char[]` allocation in `CsvWriter.WriteLine`/`WriteLineAsync` by caching the fixed quote-trigger characters in a static array and checking the variable separator separately + ## [2.0.245] - 2026-05-17 ### Fixed diff --git a/Csv.Tests/WriterTests.cs b/Csv.Tests/WriterTests.cs index 30dabfc..7e131f7 100644 --- a/Csv.Tests/WriterTests.cs +++ b/Csv.Tests/WriterTests.cs @@ -45,6 +45,42 @@ public void HeaderAndRowsEscapedValues() $"\"A,\",\"\"\"B\",\"C\"\"\",\"D'\"{Environment.NewLine}X,Y,Z,{Environment.NewLine}X,Y,Z,{Environment.NewLine}"); } + [TestMethod] + public void EscapesFieldContainingCarriageReturn() + { + // RFC 4180: a lone CR must force quoting just like LF. Without it, "a\rb" would be + // written unquoted and mis-split by strict parsers. CsvBufferWriter already quoted CR; + // CsvWriter now matches it. + var result = CsvWriter.WriteToText(["A"], [["a\rb"]]); + Assert.AreEqual($"A{Environment.NewLine}\"a\rb\"{Environment.NewLine}", result); + } + + [TestMethod] + public async Task EscapesFieldContainingCarriageReturn_Async() + { + var writer = new StringWriter(); + await CsvWriter.WriteAsync(writer, ["A"], [["a\rb"]]); + Assert.AreEqual($"A{Environment.NewLine}\"a\rb\"{Environment.NewLine}", writer.ToString()); + } + +#if NET8_0_OR_GREATER + [TestMethod] + public void EscapesFieldContainingCarriageReturn_MemoryPath() + { + var headers = new[] { "A".AsMemory() }; + var rows = new[] { new[] { "a\rb".AsMemory() } }; + Assert.AreEqual($"A{Environment.NewLine}\"a\rb\"{Environment.NewLine}", CsvWriter.WriteToText(headers, rows)); + } + + [TestMethod] + public void BufferWriterAndCsvWriterAgreeOnCarriageReturn() + { + using var buffer = new CsvBufferWriter(); + buffer.WriteRow(new[] { "a\rb".AsMemory() }); + StringAssert.Contains(buffer.ToString(), "\"a\rb\""); + } +#endif + //[TestMethod] //public void RowsNewLineEscapedValues() //{ diff --git a/Csv/CsvWriter.cs b/Csv/CsvWriter.cs index a749510..34c8975 100644 --- a/Csv/CsvWriter.cs +++ b/Csv/CsvWriter.cs @@ -26,9 +26,14 @@ public static class CsvWriter // Keep the fixed escape chars cached and check the separator with a separate Contains. // Without this caching, MemoryExtensions.IndexOfAny(ReadOnlySpan, ReadOnlySpan)/char[] // builds a fresh SearchValues on the heap every call. - private static readonly SearchValues FixedEscapeChars = SearchValues.Create("'\n"); + private static readonly SearchValues FixedEscapeChars = SearchValues.Create("'\n\r"); #endif + // RFC 4180: a field must be quoted when it contains a quote, the separator, CR, or LF. + // The separator varies per call, so only the fixed triggers are cached here as a shared + // array (scanned alongside a separate separator check) to avoid a per-row char[] allocation. + private static readonly char[] FixedEscapeCharsArray = { '\'', '\n', '\r' }; + /// /// Writes the lines to the writer without headers. Column count is determined from the first data line. /// @@ -468,7 +473,6 @@ public static async Task WriteToTextAsync(ReadOnlyMemory[]? header private static void WriteLine(TextWriter writer, string[] data, int columnCount, char separator) { - var escapeChars = new[] { separator, '\'', '\n' }; for (var i = 0; i < columnCount; i++) { if (i > 0) @@ -487,7 +491,7 @@ private static void WriteLine(TextWriter writer, string[] data, int columnCount, escape = true; cell = cell.Replace("\"", "\"\""); } - else if (cell.IndexOfAny(escapeChars) >= 0) + else if (cell.IndexOf(separator) >= 0 || cell.IndexOfAny(FixedEscapeCharsArray) >= 0) escape = true; if (escape) @@ -503,7 +507,6 @@ private static void WriteLine(TextWriter writer, string[] data, int columnCount, private static async Task WriteLineAsync(TextWriter writer, string[] data, int columnCount, char separator) { - var escapeChars = new[] { separator, '\'', '\n' }; for (var i = 0; i < columnCount; i++) { if (i > 0) @@ -555,7 +558,7 @@ private static async Task WriteLineAsync(TextWriter writer, string[] data, int c // Write closing quote await writer.WriteAsync('"').ConfigureAwait(false); } - else if (cell.IndexOfAny(escapeChars) >= 0) + else if (cell.IndexOf(separator) >= 0 || cell.IndexOfAny(FixedEscapeCharsArray) >= 0) { await writer.WriteAsync('"').ConfigureAwait(false); await writer.WriteAsync(cell).ConfigureAwait(false); From 2e189327ca0175b500ed5ba6a5cf3e39a4ad8a2d Mon Sep 17 00:00:00 2001 From: Steve Hansen Date: Sat, 30 May 2026 15:57:54 +0200 Subject: [PATCH 2/2] perf: use cached SearchValues on the NET8+ writer hot path Address PR review: on NET8+, WriteLine/WriteLineAsync now reuse the cached SearchValues via cell.AsSpan().IndexOfAny + string.Contains(separator) instead of char[] IndexOfAny, matching the existing memory write paths. The char[] fallback is retained for netstandard2.0 and scoped under #if so it is not flagged unused on NET8+. Co-Authored-By: Claude Opus 4.8 (1M context) --- Csv/CsvWriter.cs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Csv/CsvWriter.cs b/Csv/CsvWriter.cs index 34c8975..853a487 100644 --- a/Csv/CsvWriter.cs +++ b/Csv/CsvWriter.cs @@ -27,12 +27,13 @@ public static class CsvWriter // Without this caching, MemoryExtensions.IndexOfAny(ReadOnlySpan, ReadOnlySpan)/char[] // builds a fresh SearchValues on the heap every call. private static readonly SearchValues FixedEscapeChars = SearchValues.Create("'\n\r"); -#endif - +#else // RFC 4180: a field must be quoted when it contains a quote, the separator, CR, or LF. - // The separator varies per call, so only the fixed triggers are cached here as a shared - // array (scanned alongside a separate separator check) to avoid a per-row char[] allocation. + // Kept as a shared array (scanned alongside a separate separator check) so WriteLine + // doesn't allocate a per-row char[] on the netstandard2.0 path. NET8+ uses the + // vectorized SearchValues above instead. private static readonly char[] FixedEscapeCharsArray = { '\'', '\n', '\r' }; +#endif /// /// Writes the lines to the writer without headers. Column count is determined from the first data line. @@ -491,7 +492,11 @@ private static void WriteLine(TextWriter writer, string[] data, int columnCount, escape = true; cell = cell.Replace("\"", "\"\""); } +#if NET8_0_OR_GREATER + else if (cell.Contains(separator) || cell.AsSpan().IndexOfAny(FixedEscapeChars) >= 0) +#else else if (cell.IndexOf(separator) >= 0 || cell.IndexOfAny(FixedEscapeCharsArray) >= 0) +#endif escape = true; if (escape) @@ -558,7 +563,11 @@ private static async Task WriteLineAsync(TextWriter writer, string[] data, int c // Write closing quote await writer.WriteAsync('"').ConfigureAwait(false); } +#if NET8_0_OR_GREATER + else if (cell.Contains(separator) || cell.AsSpan().IndexOfAny(FixedEscapeChars) >= 0) +#else else if (cell.IndexOf(separator) >= 0 || cell.IndexOfAny(FixedEscapeCharsArray) >= 0) +#endif { await writer.WriteAsync('"').ConfigureAwait(false); await writer.WriteAsync(cell).ConfigureAwait(false);