From c5d2ccf2fe3ae34817a571266c51b04e24994cd6 Mon Sep 17 00:00:00 2001 From: Steve Hansen Date: Sat, 16 May 2026 13:18:50 +0200 Subject: [PATCH] fix: memory-based reader paths drop records after blank lines (#122) MemorySliceLineSource.TryReadLine and MemoryReaderLineSource.TryReadLine returned false on blank middle-of-stream lines, terminating enumeration and silently dropping every record that followed. The TextReader-based paths don't have this bug because TextReader.ReadLine distinguishes blank ("") from EOF (null), letting the engine's SkipRow predicate skip empties while continuing to read. Fix: TryReadLine now returns false only when position >= csv.Length at entry. Blank middle-of-stream lines surface as empty MemoryText with return value true, and the engine's existing SkipRow logic skips them (by default; or surfaces them as empty records if SkipRow is overridden). This was a pre-existing bug in ReadLineOptimized and ReadFromMemory preserved verbatim by #118; flagged by Gemini on PR #121 and filed separately because the fix requires its own correctness analysis. Adds three cross-path tests pinning the new behavior: - blank middle-of-stream line skipped by default SkipRow - blank line surfaces as an empty record when SkipRow is disabled - trailing blank line terminates cleanly after the last real record Co-Authored-By: Claude Opus 4.7 (1M context) --- Csv.Tests/EngineUnificationTests.cs | 75 +++++++++++++++++++++++++++-- Csv/CsvReader.Engine.cs | 30 +++--------- 2 files changed, 78 insertions(+), 27 deletions(-) diff --git a/Csv.Tests/EngineUnificationTests.cs b/Csv.Tests/EngineUnificationTests.cs index ed79802..179fcb5 100644 --- a/Csv.Tests/EngineUnificationTests.cs +++ b/Csv.Tests/EngineUnificationTests.cs @@ -66,7 +66,10 @@ private static List RunRead(string csv, CsvOptions options) { var byName = new Dictionary(); foreach (var h in line.Headers) - byName[h] = line[h]; + { + if (line.LineHasColumn(h)) + byName[h] = line[h]; + } result.Add(new Row { @@ -89,7 +92,10 @@ private static List RunReadAsSpan(string csv, CsvOptions options) { var byName = new Dictionary(); foreach (var h in line.Headers) - byName[h] = line[h]; + { + if (line.LineHasColumn(h)) + byName[h] = line[h]; + } result.Add(new Row { @@ -111,7 +117,10 @@ private static async Task> RunReadAsync(string csv, CsvOptions options { var byName = new Dictionary(); foreach (var h in line.Headers) - byName[h] = line[h]; + { + if (line.LineHasColumn(h)) + byName[h] = line[h]; + } result.Add(new Row { @@ -132,7 +141,10 @@ private static List RunReadFromMemoryOptimized(string csv, CsvOptions optio { var byName = new Dictionary(); foreach (var h in line.Headers) - byName[h] = line[h]; + { + if (line.LineHasColumn(h)) + byName[h] = line[h]; + } result.Add(new Row { @@ -155,7 +167,10 @@ private static List RunReadFromMemory(string csv, CsvOptions options) var valueStrings = line.Values.Select(v => v.ToString()).ToArray(); var byName = new Dictionary(); foreach (var h in headerStrings) - byName[h] = line[h].ToString(); + { + if (line.LineHasColumn(h)) + byName[h] = line[h].ToString(); + } result.Add(new Row { @@ -606,5 +621,55 @@ public void When_ReadAsSpanEnumeratesOneThousandRecords_Then_AllocatedBytesIsFin System.Diagnostics.Trace.WriteLine($"ReadAsSpan over 1000 records allocated {delta} bytes."); } #endif + + [TestMethod] + public void When_BlankLineInMiddleOfStream_Then_AllPathsContinueParsing() + { + const string csv = "a,b,c\n1,2,3\n\n4,5,6\n"; + + foreach (var path in AllPaths) + { + var rows = Run(path, csv, () => new CsvOptions()); + + Assert.AreEqual(2, rows.Count, $"{path}: expected 2 records after default SkipRow elides the blank line, got {rows.Count}"); + Assert.AreEqual("1,2,3", rows[0].Raw, path.ToString()); + Assert.AreEqual("4,5,6", rows[1].Raw, path.ToString()); + } + } + + [TestMethod] + public void When_BlankLineAndSkipRowDisabled_Then_AllPathsReturnEmptyRecord() + { + const string csv = "a,b,c\n1,2,3\n\n4,5,6\n"; + + foreach (var path in AllPaths) + { + var rows = Run(path, csv, () => new CsvOptions + { + SkipRow = (_, _) => false, + ValidateColumnCount = false, + ReturnEmptyForMissingColumn = true, + }); + + Assert.AreEqual(3, rows.Count, $"{path}: expected 3 records when SkipRow is disabled (blank line surfaces as empty record), got {rows.Count}"); + Assert.AreEqual("1,2,3", rows[0].Raw, path.ToString()); + Assert.AreEqual(string.Empty, rows[1].Raw, path.ToString()); + Assert.AreEqual("4,5,6", rows[2].Raw, path.ToString()); + } + } + + [TestMethod] + public void When_TrailingBlankLine_Then_AllPathsTerminateAfterLastRecord() + { + const string csv = "a,b,c\n1,2,3\n\n"; + + foreach (var path in AllPaths) + { + var rows = Run(path, csv, () => new CsvOptions()); + + Assert.AreEqual(1, rows.Count, $"{path}: expected exactly 1 record, got {rows.Count}"); + Assert.AreEqual("1,2,3", rows[0].Raw, path.ToString()); + } + } } } diff --git a/Csv/CsvReader.Engine.cs b/Csv/CsvReader.Engine.cs index 4cc8a4d..c88397f 100644 --- a/Csv/CsvReader.Engine.cs +++ b/Csv/CsvReader.Engine.cs @@ -134,31 +134,17 @@ public bool TryReadLine(out MemoryText line, out string? lineString) { line = csv.Slice(position); position = csv.Length; - return !line.IsEmpty; + return true; } - var lineLength = newlineIndex; - var slice = csv.Slice(position, lineLength); + line = csv.Slice(position, newlineIndex); + position += newlineIndex; - position += lineLength; - if (position < csv.Length) - { - var ch = csv.Span[position]; - if (ch == '\r' || ch == '\n') - { - position++; - if (position < csv.Length && ch == '\r' && csv.Span[position] == '\n') - position++; - } - } - - if (slice.IsEmpty) - { - line = default; - return false; - } + var ch = csv.Span[position]; + position++; + if (position < csv.Length && ch == '\r' && csv.Span[position] == '\n') + position++; - line = slice; return true; } @@ -210,7 +196,7 @@ public bool TryReadLine(out MemoryText line, out string? lineString) } line = csv.ReadLine(ref position); - return !line.IsEmpty; + return true; } public MemoryText Concat(MemoryText head, string newLine, MemoryText tail, out string? combined)