Skip to content
This repository was archived by the owner on May 15, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion openhands_aci/editor/editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,18 @@ def validate_file(self, path: Path) -> None:
if self.is_supported_binary_file(path):
return

# Check file type
# Try encoding-based text detection first (handles CJK/multi-byte UTF-8)
try:
encoding = self._encoding_manager.get_encoding(path)
with open(path, 'r', encoding=encoding) as f:
chunk = f.read(8192)
# Null characters indicate binary content even if decoding succeeded
if '\x00' not in chunk:
return # Successfully decoded as text — not binary
except (UnicodeDecodeError, ValueError, OSError, TypeError):
pass # Fall through to binaryornot check

# Fallback to binaryornot
if is_binary(str(path)):
raise FileValidationError(
path=str(path),
Expand Down
9 changes: 9 additions & 0 deletions tests/integration/test_oh_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,3 +699,12 @@ def test_str_replace_and_insert_snippet_output_on_a_large_file(editor):
new_str='Inserted line at 500',
)
assert ' 500\tInserted line at 500' in result.output


def test_view_dense_chinese_markdown(tmp_path):
"""view should handle UTF-8 markdown files with dense CJK content."""
md_file = tmp_path / 'test.md'
md_file.write_text('中文测试内容。' * 50, encoding='utf-8')
editor = OHEditor()
result = editor(command='view', path=str(md_file))
assert '中文测试内容' in result.output
8 changes: 8 additions & 0 deletions tests/unit/test_file_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,11 @@ def test_validate_image_file():
editor.validate_file(image_file)

assert 'file appears to be binary' in str(exc_info.value).lower()


def test_validate_dense_cjk_utf8_not_binary(tmp_path):
"""Dense CJK UTF-8 text should not be rejected as binary."""
f = tmp_path / 'chinese.md'
f.write_text('中文测试内容。' * 50, encoding='utf-8')
editor = OHEditor()
editor.validate_file(f) # Should not raise