Skip to content

Commit d348cf9

Browse files
authored
Merge pull request #112 from strictdoc-project/stanislaw/develop
helpers: lxml: lxml_convert_to/from_reqif_ns_xhtml_string() helper
2 parents 272477f + 1825277 commit d348cf9

10 files changed

Lines changed: 105 additions & 18 deletions

reqif/helpers/lxml.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
from copy import deepcopy
12
from itertools import chain
23

34
from lxml import etree
45
from lxml.etree import tostring
6+
from lxml.html import fragment_fromstring
57

68

79
def dump_xml_node(node):
@@ -41,10 +43,15 @@ def my_escape_title(string: str) -> str:
4143
# when the etree.tostring(...) method is used:
4244
# <reqif-xhtml:div xmlns:reqif-xhtml="http://www.w3.org/1999/xhtml">--</reqif-xhtml:div> # noqa: E501
4345
# FIXME: Would be great to find a better solution for this.
44-
def stringify_namespaced_children(node) -> str:
46+
def stringify_namespaced_children(node, namespace_tag=None) -> str:
47+
if namespace_tag is None:
48+
nskey = next(iter(node.nsmap.keys()))
49+
else:
50+
nskey = namespace_tag
51+
4552
def _stringify_reqif_ns_node(node):
4653
assert node is not None
47-
nskey = next(iter(node.nsmap.keys()))
54+
4855
output = ""
4956
node_no_ns_tag = etree.QName(node).localname
5057
output += f"<{nskey}:{node_no_ns_tag}"
@@ -90,10 +97,37 @@ def stringify_children(node):
9097
)
9198

9299

100+
def lxml_convert_to_reqif_ns_xhtml_string(string, reqif_xhtml=True) -> str:
101+
namespace_tag = "reqif-xhtml" if reqif_xhtml else "xhtml"
102+
node = fragment_fromstring(string, create_parent="NOT-USED")
103+
return stringify_namespaced_children(node, namespace_tag=namespace_tag)
104+
105+
106+
def lxml_convert_from_reqif_ns_xhtml_string(lxml_node) -> str:
107+
lxml_node_deep_copy = deepcopy(lxml_node)
108+
lxml_strip_namespace_from_xml(lxml_node_deep_copy, full=True)
109+
return tostring(
110+
lxml_node_deep_copy, encoding=str, pretty_print=True
111+
).rstrip()
112+
113+
93114
def is_self_closed_tag(xml):
94115
# The tag cannot be closed if it has children or has a non-None text.
95116
if len(xml.getchildren()) > 0:
96117
return False
97118
if xml.text is not None:
98119
return False
99120
return True
121+
122+
123+
def lxml_strip_namespace_from_xml(root_xml, full=False):
124+
for elem in root_xml.getiterator():
125+
# Remove an XML namespace URI in the element's name but keep the
126+
# namespaces in the HTML content as found in the
127+
# <ATTRIBUTE-VALUE-XHTML> of ReqIF XML.
128+
if not full and "http://www.w3.org/1999/xhtml" in elem.tag:
129+
continue
130+
elem.tag = etree.QName(elem).localname
131+
# Remove unused namespace declarations
132+
etree.cleanup_namespaces(root_xml)
133+
return root_xml

reqif/helpers/string/__init__.py

Whitespace-only changes.

reqif/parser.py

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from lxml import etree
77
from lxml.etree import DocInfo
88

9+
from reqif.helpers.lxml import lxml_strip_namespace_from_xml
910
from reqif.models.error_handling import (
1011
ReqIFMissingTagException,
1112
ReqIFSchemaError,
@@ -100,7 +101,7 @@ def _parse_reqif(xml_reqif) -> ReqIFBundle:
100101
schema_namespace = namespace_info.get("xsi")
101102

102103
xml_reqif_nons = (
103-
ReqIFParser._strip_namespace_from_xml(copy.deepcopy(xml_reqif))
104+
lxml_strip_namespace_from_xml(copy.deepcopy(xml_reqif))
104105
if namespace is not None
105106
else xml_reqif
106107
)
@@ -317,16 +318,3 @@ def _parse_reqif_content(
317318
spec_relation_groups=spec_relation_groups,
318319
)
319320
return reqif_content, lookup, exceptions
320-
321-
@staticmethod
322-
def _strip_namespace_from_xml(root_xml):
323-
for elem in root_xml.getiterator():
324-
# Remove an XML namespace URI in the element's name but keep the
325-
# namespaces in the HTML content as found in the
326-
# <ATTRIBUTE-VALUE-XHTML> of ReqIF XML.
327-
if "http://www.w3.org/1999/xhtml" in elem.tag:
328-
continue
329-
elem.tag = etree.QName(elem).localname
330-
# Remove unused namespace declarations
331-
etree.cleanup_namespaces(root_xml)
332-
return root_xml

tests/unit/reqif/helpers/lxml/__init__.py

Whitespace-only changes.
File renamed without changes.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from lxml.etree import fromstring
2+
3+
from reqif.helpers.lxml import (
4+
lxml_convert_from_reqif_ns_xhtml_string,
5+
)
6+
7+
8+
def test__01__() -> None:
9+
spec_type_string = """\
10+
<xhtml:div xmlns:xhtml="http://www.w3.org/1999/xhtml">Some<xhtml:span>combination</xhtml:span>of<xhtml:b>tags</xhtml:b></xhtml:div>\
11+
""" # noqa: E501
12+
expected_spec_type_string = """\
13+
<div>Some<span>combination</span>of<b>tags</b></div>\
14+
"""
15+
lxml_node = fromstring(spec_type_string)
16+
xhtml_string = lxml_convert_from_reqif_ns_xhtml_string(lxml_node=lxml_node)
17+
assert xhtml_string == expected_spec_type_string
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from reqif.helpers.lxml import (
2+
lxml_convert_to_reqif_ns_xhtml_string,
3+
)
4+
5+
6+
def test__reqif_convert_to_ns_xhtml_string__1__xhtml_ns() -> None:
7+
spec_type_string = """\
8+
<div>Some<span>combination</span>of<b>tags</b></div>\
9+
"""
10+
expected_spec_type_string = """\
11+
<xhtml:div>Some<xhtml:span>combination</xhtml:span>of<xhtml:b>tags</xhtml:b></xhtml:div>\
12+
"""
13+
14+
ns_xhtml_string = lxml_convert_to_reqif_ns_xhtml_string(
15+
spec_type_string, reqif_xhtml=False
16+
)
17+
18+
assert ns_xhtml_string == expected_spec_type_string
19+
20+
21+
def test__reqif_convert_to_ns_xhtml_string__2__reqif_xhtml_ns() -> None:
22+
spec_type_string = """\
23+
<div>Some<span>combination</span>of<b>tags</b></div>\
24+
"""
25+
expected_spec_type_string = """\
26+
<reqif-xhtml:div>Some<reqif-xhtml:span>combination</reqif-xhtml:span>of<reqif-xhtml:b>tags</reqif-xhtml:b></reqif-xhtml:div>\
27+
"""
28+
29+
ns_xhtml_string = lxml_convert_to_reqif_ns_xhtml_string(
30+
spec_type_string, reqif_xhtml=True
31+
)
32+
33+
assert ns_xhtml_string == expected_spec_type_string
34+
35+
36+
def test__reqif_convert_to_ns_xhtml_string__3__string_without_tags() -> None:
37+
spec_type_string = """\
38+
Some combination of words. Multi string.\
39+
"""
40+
expected_spec_type_string = """\
41+
Some combination of words. Multi string.\
42+
"""
43+
44+
ns_xhtml_string = lxml_convert_to_reqif_ns_xhtml_string(
45+
spec_type_string, reqif_xhtml=False
46+
)
47+
48+
assert ns_xhtml_string == expected_spec_type_string

tests/unit/reqif/helpers/test_string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from reqif.helpers.string import (
1+
from reqif.helpers.string.xhtml_indent import (
22
reqif_indent_xhtml_string,
33
reqif_unindent_xhtml_string,
44
)

tests/unit/reqif/parsers/test_spec_object_parser_xhtml_indentation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from reqif.helpers.string import reqif_indent_xhtml_string
1+
from reqif.helpers.string.xhtml_indent import reqif_indent_xhtml_string
22
from reqif.parser import ReqIFParser
33
from reqif.unparser import ReqIFUnparser
44

0 commit comments

Comments
 (0)