diff --git a/CHANGELOG.md b/CHANGELOG.md index 9314801..fe5d48b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- `CsvWriter` not quoting fields that contain a bare carriage return (`\r`) + - Per RFC 4180 a field containing CR, LF, the separator, or a quote must be quoted; `CsvWriter` only triggered on `\n`, the separator, `'`, and `"`, so a value like `a\rb` was written unquoted — mis-parsed by strict readers and split into two records when re-read + - `CsvBufferWriter` already included `\r`; all `CsvWriter` paths (sync, async, and the `ReadOnlyMemory` paths) now match it + +### Performance +- Removed a per-row `char[]` allocation in `CsvWriter.WriteLine`/`WriteLineAsync` by caching the fixed quote-trigger characters in a static array and checking the variable separator separately + ## [2.0.245] - 2026-05-17 ### Fixed diff --git a/Csv.Tests/WriterTests.cs b/Csv.Tests/WriterTests.cs index 30dabfc..7e131f7 100644 --- a/Csv.Tests/WriterTests.cs +++ b/Csv.Tests/WriterTests.cs @@ -45,6 +45,42 @@ public void HeaderAndRowsEscapedValues() $"\"A,\",\"\"\"B\",\"C\"\"\",\"D'\"{Environment.NewLine}X,Y,Z,{Environment.NewLine}X,Y,Z,{Environment.NewLine}"); } + [TestMethod] + public void EscapesFieldContainingCarriageReturn() + { + // RFC 4180: a lone CR must force quoting just like LF. Without it, "a\rb" would be + // written unquoted and mis-split by strict parsers. CsvBufferWriter already quoted CR; + // CsvWriter now matches it. + var result = CsvWriter.WriteToText(["A"], [["a\rb"]]); + Assert.AreEqual($"A{Environment.NewLine}\"a\rb\"{Environment.NewLine}", result); + } + + [TestMethod] + public async Task EscapesFieldContainingCarriageReturn_Async() + { + var writer = new StringWriter(); + await CsvWriter.WriteAsync(writer, ["A"], [["a\rb"]]); + Assert.AreEqual($"A{Environment.NewLine}\"a\rb\"{Environment.NewLine}", writer.ToString()); + } + +#if NET8_0_OR_GREATER + [TestMethod] + public void EscapesFieldContainingCarriageReturn_MemoryPath() + { + var headers = new[] { "A".AsMemory() }; + var rows = new[] { new[] { "a\rb".AsMemory() } }; + Assert.AreEqual($"A{Environment.NewLine}\"a\rb\"{Environment.NewLine}", CsvWriter.WriteToText(headers, rows)); + } + + [TestMethod] + public void BufferWriterAndCsvWriterAgreeOnCarriageReturn() + { + using var buffer = new CsvBufferWriter(); + buffer.WriteRow(new[] { "a\rb".AsMemory() }); + StringAssert.Contains(buffer.ToString(), "\"a\rb\""); + } +#endif + //[TestMethod] //public void RowsNewLineEscapedValues() //{ diff --git a/Csv/CsvWriter.cs b/Csv/CsvWriter.cs index a749510..853a487 100644 --- a/Csv/CsvWriter.cs +++ b/Csv/CsvWriter.cs @@ -26,7 +26,13 @@ public static class CsvWriter // Keep the fixed escape chars cached and check the separator with a separate Contains. // Without this caching, MemoryExtensions.IndexOfAny(ReadOnlySpan, ReadOnlySpan)/char[] // builds a fresh SearchValues on the heap every call. - private static readonly SearchValues FixedEscapeChars = SearchValues.Create("'\n"); + private static readonly SearchValues FixedEscapeChars = SearchValues.Create("'\n\r"); +#else + // RFC 4180: a field must be quoted when it contains a quote, the separator, CR, or LF. + // Kept as a shared array (scanned alongside a separate separator check) so WriteLine + // doesn't allocate a per-row char[] on the netstandard2.0 path. NET8+ uses the + // vectorized SearchValues above instead. + private static readonly char[] FixedEscapeCharsArray = { '\'', '\n', '\r' }; #endif /// @@ -468,7 +474,6 @@ public static async Task WriteToTextAsync(ReadOnlyMemory[]? header private static void WriteLine(TextWriter writer, string[] data, int columnCount, char separator) { - var escapeChars = new[] { separator, '\'', '\n' }; for (var i = 0; i < columnCount; i++) { if (i > 0) @@ -487,7 +492,11 @@ private static void WriteLine(TextWriter writer, string[] data, int columnCount, escape = true; cell = cell.Replace("\"", "\"\""); } - else if (cell.IndexOfAny(escapeChars) >= 0) +#if NET8_0_OR_GREATER + else if (cell.Contains(separator) || cell.AsSpan().IndexOfAny(FixedEscapeChars) >= 0) +#else + else if (cell.IndexOf(separator) >= 0 || cell.IndexOfAny(FixedEscapeCharsArray) >= 0) +#endif escape = true; if (escape) @@ -503,7 +512,6 @@ private static void WriteLine(TextWriter writer, string[] data, int columnCount, private static async Task WriteLineAsync(TextWriter writer, string[] data, int columnCount, char separator) { - var escapeChars = new[] { separator, '\'', '\n' }; for (var i = 0; i < columnCount; i++) { if (i > 0) @@ -555,7 +563,11 @@ private static async Task WriteLineAsync(TextWriter writer, string[] data, int c // Write closing quote await writer.WriteAsync('"').ConfigureAwait(false); } - else if (cell.IndexOfAny(escapeChars) >= 0) +#if NET8_0_OR_GREATER + else if (cell.Contains(separator) || cell.AsSpan().IndexOfAny(FixedEscapeChars) >= 0) +#else + else if (cell.IndexOf(separator) >= 0 || cell.IndexOfAny(FixedEscapeCharsArray) >= 0) +#endif { await writer.WriteAsync('"').ConfigureAwait(false); await writer.WriteAsync(cell).ConfigureAwait(false);