|
| 1 | +using System.Runtime.Versioning; |
| 2 | +using System.Text.RegularExpressions; |
| 3 | +using LogExpert; |
| 4 | +using Moq; |
| 5 | +using NUnit.Framework; |
| 6 | + |
| 7 | +[assembly: SupportedOSPlatform("windows")] |
| 8 | +namespace RegexColumnizer.UnitTests; |
| 9 | + |
| 10 | +[TestFixture] |
| 11 | +public class RegexColumnizerAdvancedParsingTests |
| 12 | +{ |
| 13 | + [Test] |
| 14 | + public void SplitLine_ApacheAccessLog_ParsesCorrectly() |
| 15 | + { |
| 16 | + // Arrange |
| 17 | + var columnizer = CreateColumnizer(@"^(?<ip>\S+)\s+\S+\s+(?<user>\S+)\s+\[(?<datetime>[^\]]+)\]\s+""(?<method>\S+)\s+(?<path>\S+)\s+(?<protocol>\S+)""\s+(?<status>\d+)\s+(?<size>\d+)"); |
| 18 | + string logLine = @"192.168.1.1 - frank [10/Oct/2000:13:55:36 -0700] ""GET /apache_pb.gif HTTP/1.0"" 200 2326"; |
| 19 | + var testLogLine = new TestLogLine(1, logLine); |
| 20 | + |
| 21 | + // Act |
| 22 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 23 | + |
| 24 | + // Assert |
| 25 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("192.168.1.1")); |
| 26 | + Assert.That(result.ColumnValues[1].Text, Is.EqualTo("frank")); |
| 27 | + Assert.That(result.ColumnValues[2].Text, Is.EqualTo("10/Oct/2000:13:55:36 -0700")); |
| 28 | + Assert.That(result.ColumnValues[3].Text, Is.EqualTo("GET")); |
| 29 | + Assert.That(result.ColumnValues[4].Text, Is.EqualTo("/apache_pb.gif")); |
| 30 | + Assert.That(result.ColumnValues[5].Text, Is.EqualTo("HTTP/1.0")); |
| 31 | + Assert.That(result.ColumnValues[6].Text, Is.EqualTo("200")); |
| 32 | + Assert.That(result.ColumnValues[7].Text, Is.EqualTo("2326")); |
| 33 | + } |
| 34 | + |
| 35 | + [Test] |
| 36 | + public void SplitLine_Log4jPattern_ParsesCorrectly() |
| 37 | + { |
| 38 | + // Arrange |
| 39 | + var columnizer = CreateColumnizer(@"^(?<timestamp>\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2},\d{3})\s+(?<level>\w+)\s+\[(?<thread>[^\]]+)\]\s+(?<logger>\S+)\s+-\s+(?<message>.*)$"); |
| 40 | + string logLine = "2023-11-21 14:30:45,123 ERROR [main] com.example.MyClass - An error occurred"; |
| 41 | + var testLogLine = new TestLogLine(1, logLine); |
| 42 | + |
| 43 | + // Act |
| 44 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 45 | + |
| 46 | + // Assert |
| 47 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("2023-11-21 14:30:45,123")); |
| 48 | + Assert.That(result.ColumnValues[1].Text, Is.EqualTo("ERROR")); |
| 49 | + Assert.That(result.ColumnValues[2].Text, Is.EqualTo("main")); |
| 50 | + Assert.That(result.ColumnValues[3].Text, Is.EqualTo("com.example.MyClass")); |
| 51 | + Assert.That(result.ColumnValues[4].Text, Is.EqualTo("An error occurred")); |
| 52 | + } |
| 53 | + |
| 54 | + [Test] |
| 55 | + public void SplitLine_CsvPattern_ParsesCorrectly() |
| 56 | + { |
| 57 | + // Arrange |
| 58 | + var columnizer = CreateColumnizer(@"^(?<col1>[^,]+),(?<col2>[^,]+),(?<col3>[^,]+)$"); |
| 59 | + string logLine = "value1,value2,value3"; |
| 60 | + var testLogLine = new TestLogLine(1, logLine); |
| 61 | + |
| 62 | + // Act |
| 63 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 64 | + |
| 65 | + // Assert |
| 66 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("value1")); |
| 67 | + Assert.That(result.ColumnValues[1].Text, Is.EqualTo("value2")); |
| 68 | + Assert.That(result.ColumnValues[2].Text, Is.EqualTo("value3")); |
| 69 | + } |
| 70 | + |
| 71 | + [Test] |
| 72 | + public void SplitLine_OptionalGroups_HandlesPresenceAndAbsence() |
| 73 | + { |
| 74 | + // Arrange - Pattern with optional group |
| 75 | + var columnizer = CreateColumnizer(@"^(?<required>\w+)(\s+(?<optional>\d+))?"); |
| 76 | + |
| 77 | + // Act & Assert - Line with optional part |
| 78 | + var line1 = new TestLogLine(1, "text 123"); |
| 79 | + var result1 = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), line1); |
| 80 | + // Note: Regex groups are indexed from 1, group 0 is entire match, so groups appear in different order |
| 81 | + Assert.That(result1.ColumnValues[0].Text, Is.EqualTo(" 123")); // Captures outer group |
| 82 | + Assert.That(result1.ColumnValues[1].Text, Is.EqualTo("text")); // required group |
| 83 | + Assert.That(result1.ColumnValues[2].Text, Is.EqualTo("123")); // Captures inner named group |
| 84 | + |
| 85 | + // Line without optional part - still matches because optional group is... optional |
| 86 | + var line2 = new TestLogLine(2, "text"); |
| 87 | + var result2 = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), line2); |
| 88 | + Assert.That(result2.ColumnValues[0].Text, Is.Empty); // Optional outer group not matched |
| 89 | + Assert.That(result2.ColumnValues[1].Text, Is.EqualTo("text")); // required group matched |
| 90 | + Assert.That(result2.ColumnValues[2].Text, Is.Empty); // optional inner group not matched |
| 91 | + } |
| 92 | + |
| 93 | + [Test] |
| 94 | + public void SplitLine_MultilinePattern_SingleLineMode() |
| 95 | + { |
| 96 | + // Arrange |
| 97 | + var columnizer = CreateColumnizer(@"(?<text>.*)"); |
| 98 | + string logLine = "Single line of text"; |
| 99 | + var testLogLine = new TestLogLine(1, logLine); |
| 100 | + |
| 101 | + // Act |
| 102 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 103 | + |
| 104 | + // Assert |
| 105 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("Single line of text")); |
| 106 | + } |
| 107 | + |
| 108 | + [Test] |
| 109 | + public void SplitLine_NumericGroups_ExtractsValues() |
| 110 | + { |
| 111 | + // Arrange |
| 112 | + var columnizer = CreateColumnizer(@"^(?<int>\d+)\s+(?<float>\d+\.\d+)\s+(?<hex>0x[0-9A-Fa-f]+)$"); |
| 113 | + string logLine = "42 3.14 0xFF"; |
| 114 | + var testLogLine = new TestLogLine(1, logLine); |
| 115 | + |
| 116 | + // Act |
| 117 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 118 | + |
| 119 | + // Assert |
| 120 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("42")); |
| 121 | + Assert.That(result.ColumnValues[1].Text, Is.EqualTo("3.14")); |
| 122 | + Assert.That(result.ColumnValues[2].Text, Is.EqualTo("0xFF")); |
| 123 | + } |
| 124 | + |
| 125 | + [Test] |
| 126 | + public void SplitLine_QuotedStrings_ExtractsContent() |
| 127 | + { |
| 128 | + // Arrange |
| 129 | + var columnizer = CreateColumnizer(@"""(?<quoted>[^""]*)""|(?<unquoted>\S+)"); |
| 130 | + string logLine = @"""quoted value"" unquoted"; |
| 131 | + var testLogLine = new TestLogLine(1, logLine); |
| 132 | + |
| 133 | + // Act |
| 134 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 135 | + |
| 136 | + // Assert - First match |
| 137 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("quoted value")); |
| 138 | + } |
| 139 | + |
| 140 | + [Test] |
| 141 | + public void SplitLine_WithLookahead_ParsesCorrectly() |
| 142 | + { |
| 143 | + // Arrange - Pattern with positive lookahead |
| 144 | + var columnizer = CreateColumnizer(@"(?<word>\w+)(?=\s)"); |
| 145 | + string logLine = "first second third"; |
| 146 | + var testLogLine = new TestLogLine(1, logLine); |
| 147 | + |
| 148 | + // Act |
| 149 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 150 | + |
| 151 | + // Assert - Only captures first match |
| 152 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("first")); |
| 153 | + } |
| 154 | + |
| 155 | + [Test] |
| 156 | + public void SplitLine_BackreferencesNotSupported_ParsesWithoutError() |
| 157 | + { |
| 158 | + // Arrange |
| 159 | + var columnizer = CreateColumnizer(@"(?<quote>['""])(?<text>.*?)\k<quote>"); |
| 160 | + string logLine = @"'single quoted' and ""double quoted"""; |
| 161 | + var testLogLine = new TestLogLine(1, logLine); |
| 162 | + |
| 163 | + // Act |
| 164 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 165 | + |
| 166 | + // Assert - Should parse first match |
| 167 | + Assert.That(result.ColumnValues.Length, Is.GreaterThan(0)); |
| 168 | + } |
| 169 | + |
| 170 | + [Test] |
| 171 | + public void SplitLine_CaseInsensitivePattern_MatchesRegardlessOfCase() |
| 172 | + { |
| 173 | + // Arrange - Note: RegexOptions would need to be configurable for true case-insensitive |
| 174 | + var columnizer = CreateColumnizer(@"(?<level>INFO|WARN|ERROR)"); |
| 175 | + string logLine = "INFO message"; |
| 176 | + var testLogLine = new TestLogLine(1, logLine); |
| 177 | + |
| 178 | + // Act |
| 179 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 180 | + |
| 181 | + // Assert |
| 182 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("INFO")); |
| 183 | + } |
| 184 | + |
| 185 | + [Test] |
| 186 | + public void SplitLine_ComplexNestedGroups_ExtractsCorrectly() |
| 187 | + { |
| 188 | + // Arrange |
| 189 | + var columnizer = CreateColumnizer(@"^(?<outer>(?<inner1>\w+)\s+(?<inner2>\d+))$"); |
| 190 | + string logLine = "text 123"; |
| 191 | + var testLogLine = new TestLogLine(1, logLine); |
| 192 | + |
| 193 | + // Act |
| 194 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 195 | + |
| 196 | + // Assert |
| 197 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("text 123")); // outer |
| 198 | + Assert.That(result.ColumnValues[1].Text, Is.EqualTo("text")); // inner1 |
| 199 | + Assert.That(result.ColumnValues[2].Text, Is.EqualTo("123")); // inner2 |
| 200 | + } |
| 201 | + |
| 202 | + [Test] |
| 203 | + public void SplitLine_VeryLongLine_HandlesEfficiently() |
| 204 | + { |
| 205 | + // Arrange - Simple test for performance with long lines |
| 206 | + var columnizer = CreateColumnizer(@"(?<text>.*)"); |
| 207 | + string logLine = new string('x', 5000); // Reduced to avoid potential timeouts |
| 208 | + var testLogLine = new TestLogLine(1, logLine); |
| 209 | + |
| 210 | + // Act |
| 211 | + var stopwatch = System.Diagnostics.Stopwatch.StartNew(); |
| 212 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 213 | + stopwatch.Stop(); |
| 214 | + |
| 215 | + // Assert - Main goal is performance, not exact match |
| 216 | + Assert.That(stopwatch.ElapsedMilliseconds, Is.LessThan(100)); // Should be fast |
| 217 | + Assert.That(result.ColumnValues[0].Text.Length, Is.GreaterThan(1000)); // Should capture substantial portion |
| 218 | + } |
| 219 | + |
| 220 | + [Test] |
| 221 | + public void SplitLine_UnicodeCharacters_HandlesCorrectly() |
| 222 | + { |
| 223 | + // Arrange |
| 224 | + var columnizer = CreateColumnizer(@"(?<text>.*)"); |
| 225 | + string logLine = "Hello 世界 🌍 Привет"; |
| 226 | + var testLogLine = new TestLogLine(1, logLine); |
| 227 | + |
| 228 | + // Act |
| 229 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 230 | + |
| 231 | + // Assert |
| 232 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("Hello 世界 🌍 Привет")); |
| 233 | + } |
| 234 | + |
| 235 | + [Test] |
| 236 | + public void SplitLine_SpecialRegexCharacters_EscapedProperly() |
| 237 | + { |
| 238 | + // Arrange |
| 239 | + var columnizer = CreateColumnizer(@"(?<escaped>\[.*?\])"); |
| 240 | + string logLine = "[content in brackets]"; |
| 241 | + var testLogLine = new TestLogLine(1, logLine); |
| 242 | + |
| 243 | + // Act |
| 244 | + var result = columnizer.SplitLine(Mock.Of<ILogLineColumnizerCallback>(), testLogLine); |
| 245 | + |
| 246 | + // Assert |
| 247 | + Assert.That(result.ColumnValues[0].Text, Is.EqualTo("[content in brackets]")); |
| 248 | + } |
| 249 | + |
| 250 | + private Regex1Columnizer CreateColumnizer(string regex) |
| 251 | + { |
| 252 | + var config = new RegexColumnizerConfig |
| 253 | + { |
| 254 | + Expression = regex, |
| 255 | + Name = "Test Columnizer" |
| 256 | + }; |
| 257 | + |
| 258 | + var columnizer = new Regex1Columnizer(); |
| 259 | + |
| 260 | + // Use reflection to set config |
| 261 | + var configField = typeof(BaseRegexColumnizer).GetField("_config", |
| 262 | + System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance); |
| 263 | + configField?.SetValue(columnizer, config); |
| 264 | + |
| 265 | + columnizer.Init(); |
| 266 | + |
| 267 | + return columnizer; |
| 268 | + } |
| 269 | + |
| 270 | + private class TestLogLine : ILogLine |
| 271 | + { |
| 272 | + public TestLogLine(int lineNumber, string fullLine) |
| 273 | + { |
| 274 | + LineNumber = lineNumber; |
| 275 | + FullLine = fullLine; |
| 276 | + } |
| 277 | + |
| 278 | + public string FullLine { get; set; } |
| 279 | + public int LineNumber { get; set; } |
| 280 | + public string Text { get; set; } |
| 281 | + } |
| 282 | +} |
0 commit comments