From e28c5804e80680bb010bc630f6f9a470f19a5739 Mon Sep 17 00:00:00 2001 From: Ayush Pramanik Date: Sun, 31 May 2026 00:38:34 -0400 Subject: [PATCH] fix: catch UnicodeDecodeError in IpynbConverter.accepts() for non-ASCII files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a non-ASCII file (e.g. a French PDF) has a JSON MIME type, the decode call in accepts() would raise UnicodeDecodeError and crash the entire conversion pipeline. accepts() should never raise — return False instead when the content cannot be decoded. Fixes #1894. Co-Authored-By: Claude Sonnet 4.6 --- .../markitdown/src/markitdown/converters/_ipynb_converter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py index b15e77aa2..2dd476112 100644 --- a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py +++ b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py @@ -38,6 +38,8 @@ def accepts( "nbformat" in notebook_content and "nbformat_minor" in notebook_content ) + except (UnicodeDecodeError, ValueError): + return False finally: file_stream.seek(cur_pos)