From 9060b45465157e7997cabff50b2488e2fc2d5973 Mon Sep 17 00:00:00 2001 From: venti <1308199824@qq.com> Date: Sat, 30 May 2026 16:42:08 +0800 Subject: [PATCH] fix: guard against missing oMath element in DOCX math converter (#1979) Add a None check in _convert_omath_to_latex() to return an empty string when the math element is missing, instead of passing None to oMath2Latex() which raises TypeError. Fixes #1979 --- .../converter_utils/docx/pre_process.py | 2 + packages/markitdown/tests/test_module_misc.py | 63 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py b/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py index d6fa8db69..257f0387f 100644 --- a/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py +++ b/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py @@ -44,6 +44,8 @@ def _convert_omath_to_latex(tag: Tag) -> str: math_root = ET.fromstring(MATH_ROOT_TEMPLATE.format(str(tag))) # Find the 'oMath' element within the XML document math_element = math_root.find(OMML_NS + "oMath") + if math_element is None: + return "" # Convert the 'oMath' element to LaTeX using the oMath2Latex function latex = oMath2Latex(math_element).latex return latex diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py index 4d62e4919..bce508790 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitdown/tests/test_module_misc.py @@ -274,6 +274,69 @@ def test_docx_equations() -> None: assert block_equations, "No block equations found in the document." +def test_docx_malformed_equations() -> None: + """Malformed equations should not crash the converter (issue #1979).""" + import zipfile + from io import BytesIO + + docx_xml = """ + + + + Normal text + + + + x+1 + + + + After good equation + + + + + + After empty oMathPara + + + + + + After empty inline oMath + + +""" + + buf = BytesIO() + with zipfile.ZipFile(buf, "w") as z: + z.writestr("word/document.xml", docx_xml.encode("utf-8")) + z.writestr("[Content_Types].xml", """ + + + + +""") + z.writestr("word/_rels/document.xml.rels", """ + +""") + z.writestr("_rels/.rels", """ + + +""") + + buf.seek(0) + markitdown = MarkItDown() + result = markitdown.convert(buf) + assert "Normal text" in result.text_content + assert "$x+1$" in result.text_content or "$$x+1$$" in result.text_content + assert "After empty oMathPara" in result.text_content + assert "After empty inline oMath" in result.text_content + + def test_input_as_strings() -> None: markitdown = MarkItDown()