Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,52 @@ def test_parse_again(self):
self.assertEqual(expat.ErrorString(cm.exception.code),
expat.errors.XML_ERROR_FINISHED)

@support.subTests("encoding", ("utf-8", "utf-16"))
def test_parse_reentrancy_with_encoding(self, encoding):
# See https://github.com/python/cpython/issues/146169.
parser = expat.ParserCreate(encoding=encoding)

CharacterDataHandler = lambda data: parser.Parse(data, False)
CharacterDataHandler = mock.Mock(wraps=CharacterDataHandler)
def StartElementHandler(name, attrs):
parser.CharacterDataHandler = CharacterDataHandler
parser.StartElementHandler = StartElementHandler

payload = "<a>x".encode(encoding)
msg = re.escape("cannot call Parse() from within a handler")
with self.assertRaisesRegex(RuntimeError, msg):
for i in range(len(payload)):
parser.Parse(payload[i:i+1], i == len(payload) - 1)
CharacterDataHandler.assert_called_once_with("x")

@support.subTests("encoding", ("utf-8", "utf-16"))
def test_parse_reentrancy_allowed_for_external_parser(self, encoding):
parser = expat.ParserCreate(encoding=encoding)
subparser = parser.ExternalEntityParserCreate(None, encoding)
payload_extstr = '<!ENTITY ext SYSTEM "entity.file">'

def ExternalEntityRefHandler(*args):
subparser.Parse(payload_extstr, True)
return 1 # return an integer to indicate that parsing continues
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment seems to nee fixing:

Suggested change
return 1 # return an integer to indicate that parsing continues
return 1 # return a non-zero integer to indicate that parsing continues

ExternalEntityRefHandler = mock.Mock(wraps=ExternalEntityRefHandler)

def StartElementHandler(*args):
parser.ExternalEntityRefHandler = ExternalEntityRefHandler
parser.StartElementHandler = StartElementHandler

payload = f"""\
<?xml version="1.0" standalone="no"?>
<!DOCTYPE quotations SYSTEM "quotations.dtd" [{payload_extstr}]>
<root>&ext;</root>
""".encode(encoding)
Comment on lines +312 to +316
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just an idea: Use of dedent could help with readability here:

Suggested change
payload = f"""\
<?xml version="1.0" standalone="no"?>
<!DOCTYPE quotations SYSTEM "quotations.dtd" [{payload_extstr}]>
<root>&ext;</root>
""".encode(encoding)
payload = dedent(f"""\
<?xml version="1.0" standalone="no"?>
<!DOCTYPE quotations SYSTEM "quotations.dtd" [{payload_extstr}]>
<root>&ext;</root>
""").encode(encoding)


# Check that external parsers be called from parent's handlers.
for i in range(len(payload)):
parser.Parse(payload[i:i+1], i == len(payload) - 1)
external_ref_args = ('ext', None, 'entity.file', None)
ExternalEntityRefHandler.assert_called_once_with(*external_ref_args)


class NamespaceSeparatorTest(unittest.TestCase):
def test_legal(self):
# Tests that make sure we get errors when the namespace_separator value
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:mod:`xml.parsers.expat`: raise :exc:`RuntimeError` when an Expat handler
calls :meth:`parser.Parse <xml.parsers.expat.xmlparser.Parse>` on the parser
that called the handler. Patch by Bénédikt Tran.
6 changes: 6 additions & 0 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,12 @@ pyexpat_xmlparser_Parse_impl(xmlparseobject *self, PyTypeObject *cls,
int rc;
pyexpat_state *state = PyType_GetModuleState(cls);

if (self->in_callback) {
PyErr_SetString(PyExc_RuntimeError,
"cannot call Parse() from within a handler");
return NULL;
}

Comment on lines +866 to +871
Copy link
Copy Markdown
Contributor

@hartwork hartwork Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does ParseFile or function pyexpat_xmlparser_ParseFile_impl need something similar?

PS: Also — other than pyexpat_xmlparser_Parse_impl — it does not seem to call XML_SetEncoding. That asymmetry seems unintended (but maybe I am missing something). Maybe such a call is missing.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh maybe, I will check. As for XML_SetEncoding, I don't know. I'll have a look at it tomorrow!

if (PyUnicode_Check(data)) {
view.buf = NULL;
s = PyUnicode_AsUTF8AndSize(data, &slen);
Expand Down
Loading