From b07c236e56cb7855608f82d1f34847b0065463ff Mon Sep 17 00:00:00 2001 From: Matthew Martin Date: Mon, 6 Apr 2026 18:04:45 -0400 Subject: [PATCH 1/2] attempt one --- src/bagit/__init__.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/bagit/__init__.py b/src/bagit/__init__.py index c17b1f2..6329b3f 100755 --- a/src/bagit/__init__.py +++ b/src/bagit/__init__.py @@ -131,9 +131,25 @@ def find_locale_dir(): #: Block size used when reading files for hashing: HASH_BLOCK_SIZE = 512 * 1024 + +def open_text_file(filename, mode="r", encoding="utf-8", errors="strict"): + # Open the underlying file in binary mode so the codec wrapper keeps codecs.open() behavior without its deprecation. + binary_mode = mode.replace("t", "") + if "b" not in binary_mode: + binary_mode += "b" + + stream = open(filename, binary_mode) + if "r" in mode and all(flag not in mode for flag in ("w", "a", "+")): + wrapped = codecs.getreader(encoding)(stream, errors=errors) + else: + wrapped = codecs.getwriter(encoding)(stream, errors=errors) + + wrapped.encoding = codecs.lookup(encoding).name.upper() + return wrapped + + #: Convenience function used everywhere we want to open a file to read text #: rather than undecoded bytes: -open_text_file = partial(codecs.open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: UNICODE_BYTE_ORDER_MARK = "\ufeff" @@ -1423,8 +1439,8 @@ def _encode_filename(s): def _decode_filename(s): - s = re.sub(r"%0D", "\r", s, re.IGNORECASE) - s = re.sub(r"%0A", "\n", s, re.IGNORECASE) + s = re.sub(r"%0D", "\r", s, flags=re.IGNORECASE) + s = re.sub(r"%0A", "\n", s, flags=re.IGNORECASE) return s From 808efe3cb6092bac94236dd84271dade9e69eec6 Mon Sep 17 00:00:00 2001 From: Matthew Martin Date: Mon, 6 Apr 2026 19:57:23 -0400 Subject: [PATCH 2/2] set utf-8, strict --- src/bagit/__init__.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/bagit/__init__.py b/src/bagit/__init__.py index 6329b3f..890dbd8 100755 --- a/src/bagit/__init__.py +++ b/src/bagit/__init__.py @@ -131,25 +131,9 @@ def find_locale_dir(): #: Block size used when reading files for hashing: HASH_BLOCK_SIZE = 512 * 1024 - -def open_text_file(filename, mode="r", encoding="utf-8", errors="strict"): - # Open the underlying file in binary mode so the codec wrapper keeps codecs.open() behavior without its deprecation. - binary_mode = mode.replace("t", "") - if "b" not in binary_mode: - binary_mode += "b" - - stream = open(filename, binary_mode) - if "r" in mode and all(flag not in mode for flag in ("w", "a", "+")): - wrapped = codecs.getreader(encoding)(stream, errors=errors) - else: - wrapped = codecs.getwriter(encoding)(stream, errors=errors) - - wrapped.encoding = codecs.lookup(encoding).name.upper() - return wrapped - - #: Convenience function used everywhere we want to open a file to read text #: rather than undecoded bytes: +open_text_file = partial(open, encoding="utf-8", errors="strict") # This is the same as decoding the byte values in codecs.BOM: UNICODE_BYTE_ORDER_MARK = "\ufeff"