Skip to content

Commit 610e380

Browse files
committed
utils: is_binary(): drop check for \xff in byte string
Instead rely on checking for only null bytes in the initial chunk of the file. And revert decoding confidence setting to fix tests.
1 parent 60fcb1b commit 610e380

1 file changed

Lines changed: 2 additions & 2 deletions

File tree

src/pkgcheck/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def is_binary(path, blocksize=1024):
9191

9292
# guess character encoding using chardet
9393
detected_encoding = chardet.detect(byte_str)
94-
if detected_encoding['confidence'] > 0.6:
94+
if detected_encoding['confidence'] > 0.8:
9595
try:
9696
byte_str.decode(encoding=detected_encoding['encoding'])
9797
decodable = True
@@ -101,6 +101,6 @@ def is_binary(path, blocksize=1024):
101101
# finally use all the checks to decide binary or text
102102
if decodable:
103103
return False
104-
if is_likely_binary or b'\x00' in byte_str or b'\xff' in byte_str:
104+
if is_likely_binary or b'\x00' in byte_str:
105105
return True
106106
return False

0 commit comments

Comments
 (0)