Skip to content

Commit a123422

Browse files
authored
Merge pull request #206 from strictdoc-project/Stanislaw/tinymce_comments
fix(lxml): improve lxml_stringify_node() to handle <!-- comments -->
2 parents 13b578e + dbd387c commit a123422

4 files changed

Lines changed: 145 additions & 14 deletions

File tree

.gitignore

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
*.idea
1+
/.idea/
2+
/.venv/
3+
/dist/
4+
25
**/build/
36
*.egg-info/
47
*.pyc
58
*.script
6-
*.venv
7-
dist
8-
output.zip
9-
*.sketch
109
.DS_Store
1110
setup.py
1211
.vscode

reqif/helpers/lxml.py

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
from itertools import chain
44

55
from lxml import etree
6-
from lxml.etree import _Comment, tostring
6+
from lxml.etree import Comment, tostring
77
from lxml.html import fragment_fromstring
88

99

1010
def lxml_dump_node(node):
11-
return lxml_stringify_node(node)
11+
return lxml_stringify_node(node, root_node=False)
1212

1313

1414
# This code is taken from Python 3.7. The addition is escaping of the tab
@@ -25,6 +25,8 @@ def lxml_escape_for_html(string: str) -> str:
2525
string = string.replace(">", "&gt;")
2626
string = string.replace('"', "&quot;")
2727
string = string.replace("'", "&#x27;")
28+
# Non-breaking space character.
29+
string = string.replace("\xa0", "&#xA0;")
2830
# Invisible tab character
2931
string = string.replace("\t", "&#9;")
3032
return string
@@ -120,16 +122,53 @@ def _lxml_stringify_reqif_ns_node(node):
120122
return string
121123

122124

123-
def lxml_stringify_node(node):
125+
def lxml_stringify_node(node, root_node=True):
126+
"""
127+
Stringify a given LXML node.
128+
129+
:param node:
130+
:param root_node: Needed to track whether a node is the first among the
131+
nodes being stringified. Some ReqIF producers do not use a
132+
global xmlns="http://www.w3.org/1999/xhtml" namespace.
133+
Instead, they assign this namespace only to the very first
134+
node inside the ATTRIBUTE-VALUE-XHTML/THE-VALUE tag, for
135+
example: <div xmlns="http://www.w3.org/1999/xhtml">...
136+
Tracking this root node ensures that the xmlns attribute
137+
is assigned only to the first node and not to all
138+
subsequent nodes.
139+
:return:
140+
"""
141+
output = ""
142+
143+
# Some ReqIF producers add <!-- --> comments but these comment nodes
144+
# cannot be handled using etree.QName(node).localname like used further
145+
# below. Handling them separately with this dedicated branch.
146+
# A user report that helped to discover this case:
147+
# https://github.com/strictdoc-project/reqif/issues/205
148+
if lxml_is_comment_node(node):
149+
assert node.text is not None
150+
output = f"<!--{node.text}-->"
151+
if node.tail is not None:
152+
output += lxml_escape_for_html(node.tail)
153+
return output
154+
124155
nskey = None
156+
nsvalue = None
125157
if len(node.nsmap) > 0:
126-
nskey = next(iter(node.nsmap.keys()))
127-
output = ""
158+
nskey, nsvalue = next(iter(node.nsmap.items()))
159+
128160
node_no_ns_tag = etree.QName(node).localname
129-
tag = f"{nskey}:{node_no_ns_tag}" if node.tag[0] == "{" else node.tag
161+
tag = (
162+
f"{nskey}:{node_no_ns_tag}"
163+
if node.tag[0] == "{" and nskey is not None
164+
else node_no_ns_tag
165+
)
130166
output += f"<{tag}"
131167
for attribute, attribute_value in node.attrib.items():
132168
output += f' {attribute}="{lxml_escape_for_html(attribute_value)}"'
169+
if nsvalue is not None and root_node:
170+
output += f' xmlns="{nsvalue}"'
171+
133172
# <object> is surprisingly a tag that must have a closing tag even if it
134173
# is empty. If self-closed, it breaks all the following markup.
135174
if (
@@ -141,7 +180,7 @@ def lxml_stringify_node(node):
141180
if node.text is not None:
142181
output += lxml_escape_for_html(node.text)
143182
for child in node.getchildren():
144-
output += lxml_stringify_node(child)
183+
output += lxml_stringify_node(child, root_node=False)
145184
output += f"</{tag}>"
146185
else:
147186
output += "/>"
@@ -221,5 +260,4 @@ def lxml_strip_namespace_from_xml(root_xml, full=False):
221260

222261

223262
def lxml_is_comment_node(xml_node):
224-
# FIXME: Accessing a "_"-marked Comment class of lxml is not great.
225-
return isinstance(xml_node, _Comment)
263+
return xml_node.tag is Comment
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<REQ-IF xmlns="http://www.omg.org/spec/ReqIF/20110401/reqif.xsd">
3+
<THE-HEADER>
4+
<REQ-IF-HEADER IDENTIFIER="header_id">
5+
<CREATION-TIME>2023-01-01T00:00:00.000Z</CREATION-TIME>
6+
<REQ-IF-TOOL-ID>Test Tool</REQ-IF-TOOL-ID>
7+
<REQ-IF-VERSION>1.0</REQ-IF-VERSION>
8+
<SOURCE-TOOL-ID>Test</SOURCE-TOOL-ID>
9+
<TITLE>Minimal ReqIF</TITLE>
10+
</REQ-IF-HEADER>
11+
</THE-HEADER>
12+
<CORE-CONTENT>
13+
<REQ-IF-CONTENT>
14+
<DATATYPES>
15+
<DATATYPE-DEFINITION-STRING IDENTIFIER="DT_String" LONG-NAME="String" MAX-LENGTH="1000">
16+
</DATATYPE-DEFINITION-STRING>
17+
</DATATYPES>
18+
<SPEC-TYPES>
19+
<SPEC-OBJECT-TYPE IDENTIFIER="SOT_Req" LONG-NAME="Requirement">
20+
<SPEC-ATTRIBUTES>
21+
<ATTRIBUTE-DEFINITION-STRING IDENTIFIER="AD_ForeignID" LONG-NAME="ReqIF.ForeignID">
22+
<TYPE>
23+
<DATATYPE-DEFINITION-STRING-REF>DT_String</DATATYPE-DEFINITION-STRING-REF>
24+
</TYPE>
25+
</ATTRIBUTE-DEFINITION-STRING>
26+
</SPEC-ATTRIBUTES>
27+
</SPEC-OBJECT-TYPE>
28+
<SPECIFICATION-TYPE IDENTIFIER="ST_Spec" LAST-CHANGE="2023-01-01T00:00:00.000Z" LONG-NAME="Specification">
29+
<SPEC-ATTRIBUTES>
30+
<ATTRIBUTE-DEFINITION-STRING IDENTIFIER="AD_SpecName" LONG-NAME="Name">
31+
<TYPE>
32+
<DATATYPE-DEFINITION-STRING-REF>DT_String</DATATYPE-DEFINITION-STRING-REF>
33+
</TYPE>
34+
</ATTRIBUTE-DEFINITION-STRING>
35+
</SPEC-ATTRIBUTES>
36+
</SPECIFICATION-TYPE>
37+
</SPEC-TYPES>
38+
<SPEC-OBJECTS>
39+
<SPEC-OBJECT IDENTIFIER="_73d9cdc2-1c71-4d27-92ab-1df19c9f6ac3" LAST-CHANGE="2025-10-15T05:15:32.902Z">
40+
<TYPE>
41+
<SPEC-OBJECT-TYPE-REF>_6cf52001-6b78-428a-bfcd-429ba136d53a</SPEC-OBJECT-TYPE-REF>
42+
</TYPE>
43+
<VALUES>
44+
<ATTRIBUTE-VALUE-XHTML>
45+
<DEFINITION>
46+
<ATTRIBUTE-DEFINITION-XHTML-REF>_0cad5cb5-6b09-4b60-bb66-fed63d56e806</ATTRIBUTE-DEFINITION-XHTML-REF>
47+
</DEFINITION>
48+
<THE-VALUE><div xmlns="http://www.w3.org/1999/xhtml">
49+
<p><!-- x-tinymce/html -->The&#xA0;###<!-- x-tinymce/html -->Platform shall&#xA0; provide cover art within 2s of audio select</p>
50+
</div></THE-VALUE>
51+
</ATTRIBUTE-VALUE-XHTML>
52+
<ATTRIBUTE-VALUE-STRING THE-VALUE="1672600">
53+
<DEFINITION>
54+
<ATTRIBUTE-DEFINITION-STRING-REF>_d0bf9a76-226d-4856-9387-e64040705955</ATTRIBUTE-DEFINITION-STRING-REF>
55+
</DEFINITION>
56+
</ATTRIBUTE-VALUE-STRING>
57+
<ATTRIBUTE-VALUE-ENUMERATION>
58+
<DEFINITION>
59+
<ATTRIBUTE-DEFINITION-ENUMERATION-REF>_4d6b67beb8b9fa04468c6c2d5ae99f484d02c5d8</ATTRIBUTE-DEFINITION-ENUMERATION-REF>
60+
</DEFINITION>
61+
<VALUES>
62+
<ENUM-VALUE-REF>_009812fe2a185feb11f17bd36dcd508a736fe8c8</ENUM-VALUE-REF>
63+
</VALUES>
64+
</ATTRIBUTE-VALUE-ENUMERATION>
65+
</VALUES>
66+
</SPEC-OBJECT>
67+
</SPEC-OBJECTS>
68+
<SPECIFICATIONS>
69+
<SPECIFICATION IDENTIFIER="SPEC_Main" LONG-NAME="Main Specification">
70+
<VALUES>
71+
<ATTRIBUTE-VALUE-STRING THE-VALUE="Test Specification">
72+
<DEFINITION>
73+
<ATTRIBUTE-DEFINITION-STRING-REF>AD_SpecName</ATTRIBUTE-DEFINITION-STRING-REF>
74+
</DEFINITION>
75+
</ATTRIBUTE-VALUE-STRING>
76+
</VALUES>
77+
<TYPE>
78+
<SPECIFICATION-TYPE-REF>ST_Spec</SPECIFICATION-TYPE-REF>
79+
</TYPE>
80+
<CHILDREN>
81+
<SPEC-HIERARCHY IDENTIFIER="SH_001" LONG-NAME="Requirement Hierarchy">
82+
<OBJECT>
83+
<SPEC-OBJECT-REF>_73d9cdc2-1c71-4d27-92ab-1df19c9f6ac3</SPEC-OBJECT-REF>
84+
</OBJECT>
85+
</SPEC-HIERARCHY>
86+
</CHILDREN>
87+
</SPECIFICATION>
88+
</SPECIFICATIONS>
89+
</REQ-IF-CONTENT>
90+
</CORE-CONTENT>
91+
</REQ-IF>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
RUN: mkdir -p %S/output
2+
RUN: %reqif passthrough %S/sample.reqif %S/output/sample.reqif
3+
RUN: %diff %S/sample.reqif %S/output/sample.reqif

0 commit comments

Comments
 (0)