diff --git a/Csv/CsvReader.Engine.cs b/Csv/CsvReader.Engine.cs index e85287a..9a57e0b 100644 --- a/Csv/CsvReader.Engine.cs +++ b/Csv/CsvReader.Engine.cs @@ -35,7 +35,7 @@ internal interface IAsyncLineSource internal interface IRowFactory where TRow : class { - TRow Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, CsvOptions options); + TRow Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, IList? rawSplit, CsvOptions options); } internal readonly struct TextReaderLineSource : ILineSource @@ -188,22 +188,28 @@ public MemoryText Concat(MemoryText head, string newLine, MemoryText tail, out s internal readonly struct StringRowFactory : IRowFactory { - public ReadLine Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, CsvOptions options) + public ReadLine Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, IList? rawSplit, CsvOptions options) { #if NET8_0_OR_GREATER - return new ReadLine(headers, headerLookup, index, rawString ?? raw.ToString(), options); + var row = new ReadLine(headers, headerLookup, index, rawString ?? raw.ToString(), options); #else - return new ReadLine(headers, headerLookup, index, rawString ?? raw, options); + var row = new ReadLine(headers, headerLookup, index, rawString ?? raw, options); #endif + if (rawSplit != null) + row.rawSplitLine = rawSplit; + return row; } } #if NET8_0_OR_GREATER internal readonly struct SpanRowFactory : IRowFactory { - public ReadLineSpan Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, CsvOptions options) + public ReadLineSpan Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, IList? rawSplit, CsvOptions options) { - return new ReadLineSpan(headers, headerLookup, index, rawString ?? raw.ToString(), options); + var row = new ReadLineSpan(headers, headerLookup, index, rawString ?? raw.ToString(), options); + if (rawSplit != null) + row.rawSplitLine = rawSplit; + return row; } } @@ -216,17 +222,23 @@ public OptimizedRowFactory(CsvMemoryOptions memoryOptions) this.memoryOptions = memoryOptions; } - public ReadLineSpanOptimized Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, CsvOptions options) + public ReadLineSpanOptimized Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, IList? rawSplit, CsvOptions options) { - return new ReadLineSpanOptimized(headers, headerLookup, index, raw, options, memoryOptions); + var row = new ReadLineSpanOptimized(headers, headerLookup, index, raw, options, memoryOptions); + if (rawSplit != null) + row.rawSplitLine = rawSplit; + return row; } } internal readonly struct MemoryRowFactory : IRowFactory { - public ReadLineFromMemory Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, CsvOptions options) + public ReadLineFromMemory Create(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, string? rawString, IList? rawSplit, CsvOptions options) { - return new ReadLineFromMemory(headers, headerLookup, index, raw, options); + var row = new ReadLineFromMemory(headers, headerLookup, index, raw, options); + if (rawSplit != null) + row.rawSplitLine = rawSplit; + return row; } } #endif @@ -249,6 +261,8 @@ private static IEnumerable Enumerate(TSource sour if (index <= options.RowsToSkip || options.SkipRow?.Invoke(line, index) == true) continue; + IList? rawSplit = null; + if (headers == null || headerLookup == null) { InitializeOptions(line.AsSpan(), options); @@ -259,15 +273,15 @@ private static IEnumerable Enumerate(TSource sour // case via index == RowsToSkip + 1 and skips its own multiline pass to avoid double-reading. if (!skipInitialLine && options.AllowNewLineInEnclosedFieldValues) { - var splitLine = options.Splitter.Split(line, options); + rawSplit = options.Splitter.Split(line, options); - while (splitLine.Count > 0 && CsvLineSplitter.IsUnterminatedQuotedValue(splitLine[splitLine.Count - 1].AsSpan(), options)) + while (rawSplit.Count > 0 && CsvLineSplitter.IsUnterminatedQuotedValue(rawSplit[rawSplit.Count - 1].AsSpan(), options)) { if (!source.TryReadLine(out var nextLine, out _)) break; line = source.Concat(line, options.NewLine, nextLine, out lineString); - splitLine = options.Splitter.Split(line, options); + rawSplit = options.Splitter.Split(line, options); } } @@ -314,18 +328,18 @@ private static IEnumerable Enumerate(TSource sour var isFirstDataLineInHeaderAbsentMode = options.HeaderMode == HeaderMode.HeaderAbsent && index == (options.RowsToSkip + 1); if (options.AllowNewLineInEnclosedFieldValues && !isFirstDataLineInHeaderAbsentMode) { - var rawSplit = options.Splitter.Split(line, options); + rawSplit = options.Splitter.Split(line, options, headers!.Length); while (rawSplit.Count > 0 && CsvLineSplitter.IsUnterminatedQuotedValue(rawSplit[rawSplit.Count - 1].AsSpan(), options)) { if (!source.TryReadLine(out var nextLine, out _)) break; line = source.Concat(line, options.NewLine, nextLine, out lineString); - rawSplit = options.Splitter.Split(line, options); + rawSplit = options.Splitter.Split(line, options, headers!.Length); } } - yield return factory.Create(headers, headerLookup, index, line, lineString, options); + yield return factory.Create(headers, headerLookup, index, line, lineString, rawSplit, options); } } @@ -352,6 +366,8 @@ private static async IAsyncEnumerable EnumerateAsync? rawSplit = null; + if (headers == null || headerLookup == null) { InitializeOptions(line.AsSpan(), options); @@ -362,16 +378,16 @@ private static async IAsyncEnumerable EnumerateAsync 0 && CsvLineSplitter.IsUnterminatedQuotedValue(splitLine[splitLine.Count - 1].AsSpan(), options)) + while (rawSplit.Count > 0 && CsvLineSplitter.IsUnterminatedQuotedValue(rawSplit[rawSplit.Count - 1].AsSpan(), options)) { var (nextOk, nextLine, _) = await source.TryReadLineAsync(ct).ConfigureAwait(false); if (!nextOk) break; line = source.Concat(line, options.NewLine, nextLine, out lineString); - splitLine = options.Splitter.Split(line, options); + rawSplit = options.Splitter.Split(line, options); } } @@ -418,7 +434,7 @@ private static async IAsyncEnumerable EnumerateAsync 0 && CsvLineSplitter.IsUnterminatedQuotedValue(rawSplit[rawSplit.Count - 1].AsSpan(), options)) { var (nextOk, nextLine, _) = await source.TryReadLineAsync(ct).ConfigureAwait(false); @@ -426,11 +442,11 @@ private static async IAsyncEnumerable EnumerateAsync headerLookup; private readonly CsvOptions options; - private IList? rawSplitLine; + internal IList? rawSplitLine; private MemoryText[]? parsedLine; public ReadLineFromMemory(MemoryText[] headers, Dictionary headerLookup, int index, MemoryText raw, CsvOptions options) diff --git a/Csv/CsvReader.cs b/Csv/CsvReader.cs index 5a14f71..b3cb739 100644 --- a/Csv/CsvReader.cs +++ b/Csv/CsvReader.cs @@ -450,7 +450,7 @@ internal sealed class ReadLine : ICsvLine private readonly Dictionary headerLookup; private readonly CsvOptions options; private readonly MemoryText[] headers; - private IList? rawSplitLine; + internal IList? rawSplitLine; internal MemoryText[]? parsedLine; public ReadLine(MemoryText[] headers, Dictionary headerLookup, int index, string raw, CsvOptions options) @@ -551,7 +551,7 @@ internal sealed class ReadLineSpan : ICsvLineSpan private readonly Dictionary headerLookup; private readonly CsvOptions options; private readonly MemoryText[] headers; - private IList? rawSplitLine; + internal IList? rawSplitLine; internal MemoryText[]? parsedLine; public ReadLineSpan(MemoryText[] headers, Dictionary headerLookup, int index, string raw, CsvOptions options) @@ -694,7 +694,7 @@ internal sealed class ReadLineSpanOptimized : ICsvLineSpan private readonly CsvMemoryOptions memoryOptions; private readonly ReadOnlyMemory[] headers; private readonly ReadOnlyMemory rawMemory; - private IList>? rawSplitLine; + internal IList>? rawSplitLine; private ReadOnlyMemory[]? parsedLine; public ReadLineSpanOptimized(ReadOnlyMemory[] headers, Dictionary headerLookup, int index, ReadOnlyMemory raw, CsvOptions options, CsvMemoryOptions memoryOptions)