Skip to content

Commit 805a1b8

Browse files
committed
test: add CRLF handling tests for decoder and scanner
1 parent b952969 commit 805a1b8

2 files changed

Lines changed: 114 additions & 0 deletions

File tree

tests/test_decoder.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,58 @@ def test_object_key_order_preserved(self):
140140
assert keys == ["z", "a", "m", "b"]
141141
# Verify order is not alphabetical
142142
assert keys != ["a", "b", "m", "z"]
143+
144+
145+
class TestCRLFDecoding:
146+
"""Test CRLF (Windows) line ending handling in decoder."""
147+
148+
def test_decode_object_with_crlf(self):
149+
"""Test decoding objects with CRLF line endings."""
150+
toon = "name: Alice\r\nage: 30\r\n"
151+
result = decode(toon)
152+
assert result == {"name": "Alice", "age": 30}
153+
154+
def test_decode_nested_object_with_crlf(self):
155+
"""Test decoding nested objects with CRLF line endings."""
156+
toon = "person:\r\n name: Alice\r\n age: 30\r\n"
157+
result = decode(toon)
158+
assert result == {"person": {"name": "Alice", "age": 30}}
159+
160+
def test_decode_array_with_crlf(self):
161+
"""Test decoding arrays with CRLF line endings."""
162+
toon = "items[3]:\r\n - apple\r\n - banana\r\n - cherry\r\n"
163+
result = decode(toon)
164+
assert result == {"items": ["apple", "banana", "cherry"]}
165+
166+
def test_decode_delimited_array_with_crlf(self):
167+
"""Test decoding delimited arrays with CRLF line endings."""
168+
toon = "items[3]: apple,banana,cherry\r\n"
169+
result = decode(toon)
170+
assert result == {"items": ["apple", "banana", "cherry"]}
171+
172+
def test_decode_with_old_mac_cr(self):
173+
"""Test decoding with old Mac CR line endings."""
174+
toon = "name: Alice\rage: 30\r"
175+
result = decode(toon)
176+
assert result == {"name": "Alice", "age": 30}
177+
178+
def test_decode_with_mixed_line_endings(self):
179+
"""Test decoding with mixed line endings."""
180+
toon = "name: Alice\r\nage: 30\ncity: NYC\r"
181+
result = decode(toon)
182+
assert result == {"name": "Alice", "age": 30, "city": "NYC"}
183+
184+
def test_crlf_does_not_affect_quoted_strings(self):
185+
"""Test that CRLF normalization doesn't affect escaped \\r in strings."""
186+
toon = 'text: "line1\\r\\nline2"\r\n'
187+
result = decode(toon)
188+
# The string should contain the escaped sequences
189+
assert result == {"text": "line1\r\nline2"}
190+
191+
def test_crlf_in_strict_mode(self):
192+
"""Test CRLF works correctly in strict mode."""
193+
toon = "name:\r\n first: Alice\r\n age: 30\r\n"
194+
options = DecodeOptions(strict=True)
195+
result = decode(toon, options)
196+
assert result == {"name": {"first": "Alice", "age": 30}}
197+

tests/test_scanner.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,3 +241,62 @@ def test_blank_lines_not_validated_in_strict_mode(self):
241241
# Should not raise error for blank line with invalid indentation
242242
assert len(blanks) == 1
243243
assert blanks[0].line_num == 2
244+
245+
246+
class TestCRLFHandling:
247+
"""Tests for CRLF and CR normalization."""
248+
249+
def test_crlf_normalization(self):
250+
"""Test Windows CRLF line endings are normalized to LF."""
251+
source = "name: Alice\r\nage: 30\r\n"
252+
lines, blanks = to_parsed_lines(source, 2, False)
253+
# Verify no \r remains in content
254+
assert "\r" not in lines[0].content
255+
assert "\r" not in lines[1].content
256+
assert lines[0].content == "name: Alice"
257+
assert lines[1].content == "age: 30"
258+
assert len(lines) == 3 # name, age, and trailing empty line
259+
260+
def test_standalone_cr_normalization(self):
261+
"""Test old Mac CR line endings are normalized to LF."""
262+
source = "name: Alice\rage: 30\r"
263+
lines, blanks = to_parsed_lines(source, 2, False)
264+
# Verify no \r remains in content
265+
assert "\r" not in lines[0].content
266+
assert "\r" not in lines[1].content
267+
assert lines[0].content == "name: Alice"
268+
assert lines[1].content == "age: 30"
269+
assert len(lines) == 3 # name, age, and trailing empty line
270+
271+
def test_mixed_line_endings(self):
272+
"""Test mixed line endings are all normalized."""
273+
source = "line1\r\nline2\nline3\rline4"
274+
lines, blanks = to_parsed_lines(source, 2, False)
275+
assert len(lines) == 4
276+
for line in lines:
277+
assert "\r" not in line.content
278+
assert lines[0].content == "line1"
279+
assert lines[1].content == "line2"
280+
assert lines[2].content == "line3"
281+
assert lines[3].content == "line4"
282+
283+
def test_crlf_with_indentation(self):
284+
"""Test CRLF handling preserves indentation."""
285+
source = "parent:\r\n child: value\r\n"
286+
lines, blanks = to_parsed_lines(source, 2, False)
287+
assert lines[0].content == "parent:"
288+
assert lines[0].depth == 0
289+
assert lines[1].content == "child: value"
290+
assert lines[1].depth == 1
291+
assert lines[1].indent == 2
292+
293+
def test_crlf_in_strict_mode(self):
294+
"""Test CRLF normalization works in strict mode."""
295+
source = "name: Alice\r\n age: 30\r\n"
296+
lines, blanks = to_parsed_lines(source, 2, True)
297+
# Should not raise error and should properly normalize
298+
assert len(lines) == 3
299+
assert "\r" not in lines[0].content
300+
assert "\r" not in lines[1].content
301+
assert lines[1].depth == 1
302+

0 commit comments

Comments
 (0)