Skip to content

Commit d433544

Browse files
committed
helpers/lxml: fix the edge case with self-closing OBJECT tags
Closes #117: Edge case: self-closing <OBJECT> tags break the subsequent HTML markup
1 parent 84b740e commit d433544

4 files changed

Lines changed: 116 additions & 11 deletions

File tree

reqif/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.32"
1+
__version__ = "0.0.33"

reqif/helpers/lxml.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,13 @@ def _lxml_stringify_reqif_ns_node(node):
6565
output += f"<{tag}"
6666
for attribute, attribute_value in node.attrib.items():
6767
output += f' {attribute}="{lxml_escape_for_html(attribute_value)}"'
68-
if node.text is not None or len(node.getchildren()) > 0:
68+
# <object> is surprisingly a tag that must have a closing tag even
69+
# if it is empty. If self-closed, it breaks all the following markup.
70+
if (
71+
node.text is not None
72+
or len(node.getchildren()) > 0
73+
or node.tag.casefold() == "object"
74+
):
6975
output += ">"
7076
if node.text is not None:
7177
output += lxml_escape_for_html(node.text)
@@ -97,7 +103,13 @@ def lxml_stringify_node(node):
97103
output += f"<{tag}"
98104
for attribute, attribute_value in node.attrib.items():
99105
output += f' {attribute}="{lxml_escape_for_html(attribute_value)}"'
100-
if node.text is not None or len(node.getchildren()) > 0:
106+
# <object> is surprisingly a tag that must have a closing tag even if it
107+
# is empty. If self-closed, it breaks all the following markup.
108+
if (
109+
node.text is not None
110+
or len(node.getchildren()) > 0
111+
or node.tag.casefold() == "object"
112+
):
101113
output += ">"
102114
if node.text is not None:
103115
output += lxml_escape_for_html(node.text)
@@ -120,12 +132,7 @@ def lxml_stringify_children(node):
120132
chain(
121133
*(
122134
(
123-
tostring(
124-
child,
125-
encoding=str,
126-
with_tail=False,
127-
pretty_print=False,
128-
),
135+
lxml_stringify_node(child),
129136
child.tail,
130137
)
131138
for child in node.getchildren()

reqif/parsers/spec_object_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def parse(spec_object_xml) -> ReqIFSpecObject:
207207
elif attribute_xml.tag == "ATTRIBUTE-VALUE-XHTML":
208208
the_value = attribute_xml.find("THE-VALUE")
209209

210-
# Edge: There are not <xhtml:...> or <reqif-xhtml...> tags.
210+
# Edge case: There are no <xhtml:...> or <reqif-xhtml...> tags.
211211
if len(the_value.nsmap) > 0:
212212
attribute_value = lxml_stringify_namespaced_children(
213213
the_value

tests/unit/reqif/parsers/test_spec_object_parser_strip_xhtml.py

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from reqif.parser import ReqIFParser
22

33

4-
def test_01_indenting_unintended_xhtml():
4+
def test_01_normal_case():
55
input_reqif = """\
66
<?xml version="1.0" encoding="UTF-8"?>
77
<REQ-IF xmlns="http://www.omg.org/spec/ReqIF/20110401/reqif.xsd" xmlns:configuration="http://eclipse.org/rmf/pror/toolextensions/1.0" xmlns:xhtml="http://www.w3.org/1999/xhtml">
@@ -48,3 +48,101 @@ def test_01_indenting_unintended_xhtml():
4848
].value_stripped_xhtml
4949
== expected_stripped_xhtml
5050
)
51+
52+
53+
def test_02_edge_case_object_tag_no_xhtml_ns():
54+
input_reqif = """\
55+
<?xml version="1.0" encoding="UTF-8"?>
56+
<REQ-IF xmlns="http://www.omg.org/spec/ReqIF/20110401/reqif.xsd" xmlns:configuration="http://eclipse.org/rmf/pror/toolextensions/1.0" xmlns:xhtml="http://www.w3.org/1999/xhtml">
57+
<CORE-CONTENT>
58+
<REQ-IF-CONTENT>
59+
<SPEC-OBJECTS>
60+
<SPEC-OBJECT IDENTIFIER="TEST_SPEC_OBJECT_IDENTIFIER" LAST-CHANGE="2021-10-15T11:34:36.007+02:00">
61+
<VALUES>
62+
<ATTRIBUTE-VALUE-XHTML>
63+
<DEFINITION>
64+
<ATTRIBUTE-DEFINITION-XHTML-REF>_gFhrXWojEeuExICsU7Acmg</ATTRIBUTE-DEFINITION-XHTML-REF>
65+
</DEFINITION>
66+
<THE-VALUE>
67+
<xhtml:div>
68+
<object data="files/rmf-1b18ef37-ca1f-4e79-954d-747df083f861_DOCATTACHMENT_3-screenshot-20220926-051927.png" name="DOCATTACHMENT_3-screenshot-20220926-051927.png" type="image/png"/>
69+
</xhtml:div>
70+
</THE-VALUE>
71+
</ATTRIBUTE-VALUE-XHTML>
72+
</VALUES>
73+
<TYPE>
74+
<SPEC-OBJECT-TYPE-REF>TEST_SPEC_OBJECT_TYPE_IDENTIFIER_FUNCTIONAL</SPEC-OBJECT-TYPE-REF>
75+
</TYPE>
76+
</SPEC-OBJECT>
77+
</SPEC-OBJECTS>
78+
</REQ-IF-CONTENT>
79+
</CORE-CONTENT>
80+
</REQ-IF>
81+
""" # noqa: E501
82+
83+
expected_stripped_xhtml = """\
84+
<div>
85+
<object data="files/rmf-1b18ef37-ca1f-4e79-954d-747df083f861_DOCATTACHMENT_3-screenshot-20220926-051927.png" name="DOCATTACHMENT_3-screenshot-20220926-051927.png" type="image/png"></object>
86+
</div>\
87+
""" # noqa: E501
88+
89+
reqif_bundle = ReqIFParser.parse_from_string(input_reqif)
90+
spec_object = reqif_bundle.get_spec_object_by_ref(
91+
"TEST_SPEC_OBJECT_IDENTIFIER"
92+
)
93+
94+
assert (
95+
spec_object.attribute_map[
96+
"_gFhrXWojEeuExICsU7Acmg"
97+
].value_stripped_xhtml
98+
== expected_stripped_xhtml
99+
)
100+
101+
102+
def test_03_edge_case_object_tag_xhtml_ns():
103+
input_reqif = """\
104+
<?xml version="1.0" encoding="UTF-8"?>
105+
<REQ-IF xmlns="http://www.omg.org/spec/ReqIF/20110401/reqif.xsd" xmlns:configuration="http://eclipse.org/rmf/pror/toolextensions/1.0" xmlns:xhtml="http://www.w3.org/1999/xhtml">
106+
<CORE-CONTENT>
107+
<REQ-IF-CONTENT>
108+
<SPEC-OBJECTS>
109+
<SPEC-OBJECT IDENTIFIER="TEST_SPEC_OBJECT_IDENTIFIER" LAST-CHANGE="2021-10-15T11:34:36.007+02:00">
110+
<VALUES>
111+
<ATTRIBUTE-VALUE-XHTML>
112+
<DEFINITION>
113+
<ATTRIBUTE-DEFINITION-XHTML-REF>_gFhrXWojEeuExICsU7Acmg</ATTRIBUTE-DEFINITION-XHTML-REF>
114+
</DEFINITION>
115+
<THE-VALUE>
116+
<xhtml:div>
117+
<xhtml:object data="files/rmf-1b18ef37-ca1f-4e79-954d-747df083f861_DOCATTACHMENT_3-screenshot-20220926-051927.png" name="DOCATTACHMENT_3-screenshot-20220926-051927.png" type="image/png"/>
118+
</xhtml:div>
119+
</THE-VALUE>
120+
</ATTRIBUTE-VALUE-XHTML>
121+
</VALUES>
122+
<TYPE>
123+
<SPEC-OBJECT-TYPE-REF>TEST_SPEC_OBJECT_TYPE_IDENTIFIER_FUNCTIONAL</SPEC-OBJECT-TYPE-REF>
124+
</TYPE>
125+
</SPEC-OBJECT>
126+
</SPEC-OBJECTS>
127+
</REQ-IF-CONTENT>
128+
</CORE-CONTENT>
129+
</REQ-IF>
130+
""" # noqa: E501
131+
132+
expected_stripped_xhtml = """\
133+
<div>
134+
<object data="files/rmf-1b18ef37-ca1f-4e79-954d-747df083f861_DOCATTACHMENT_3-screenshot-20220926-051927.png" name="DOCATTACHMENT_3-screenshot-20220926-051927.png" type="image/png"></object>
135+
</div>\
136+
""" # noqa: E501
137+
138+
reqif_bundle = ReqIFParser.parse_from_string(input_reqif)
139+
spec_object = reqif_bundle.get_spec_object_by_ref(
140+
"TEST_SPEC_OBJECT_IDENTIFIER"
141+
)
142+
143+
assert (
144+
spec_object.attribute_map[
145+
"_gFhrXWojEeuExICsU7Acmg"
146+
].value_stripped_xhtml
147+
== expected_stripped_xhtml
148+
)

0 commit comments

Comments
 (0)