From e28c5804e80680bb010bc630f6f9a470f19a5739 Mon Sep 17 00:00:00 2001
From: Ayush Pramanik <ayushpramanik2007@gmail.com>
Date: Sun, 31 May 2026 00:38:34 -0400
Subject: [PATCH] fix: catch UnicodeDecodeError in IpynbConverter.accepts() for
 non-ASCII files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a non-ASCII file (e.g. a French PDF) has a JSON MIME type, the
decode call in accepts() would raise UnicodeDecodeError and crash the
entire conversion pipeline. accepts() should never raise — return False
instead when the content cannot be decoded. Fixes #1894.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../markitdown/src/markitdown/converters/_ipynb_converter.py    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
index b15e77aa2..2dd476112 100644
--- a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
@@ -38,6 +38,8 @@ def accepts(
                         "nbformat" in notebook_content
                         and "nbformat_minor" in notebook_content
                     )
+                except (UnicodeDecodeError, ValueError):
+                    return False
                 finally:
                     file_stream.seek(cur_pos)