Skip to content

Commit 2110e63

Browse files
committed
Make handling of repeated nodes consistent with DFTB+
1 parent 4b2a89d commit 2110e63

8 files changed

Lines changed: 245 additions & 61 deletions

File tree

docs/hsd.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ nodes will be mapped to one key, which will contain a list of dictionaries
162162
]
163163
},
164164
]
165+
# Also attributes becomes a list. Due to technialc reasons the
166+
# dictbuilder always creates an attribute list for mulitple nodes,
167+
# even if none of the nodes carries an actual attribute.
168+
"PointCharges.attrib": [None, None]
165169
}
166170
}
167171

@@ -186,7 +190,7 @@ to record following additional data for each HSD node:
186190

187191
If this information is being recorded, a special key with the
188192
``.hsdattrib`` suffix will be generated for each node in the dictionary/JSON
189-
presentation. The correpsonding value will be a dictionary with those
193+
presentation. The corresponding value will be a dictionary with those
190194
information.
191195

192196
As an example, let's store the input from the previous section ::
@@ -199,7 +203,7 @@ As an example, let's store the input from the previous section ::
199203
}
200204

201205
in the file `test.hsd`, parse it and convert the node names to lower case
202-
(to make the input processing case-insensitive). Using the Python command ::
206+
(to make enable case-insensitive input processing). Using the Python command ::
203207

204208
inpdict = hsd.load("test.hsd", lower_tag_names=True, include_hsd_attribs=True)
205209

@@ -208,13 +212,13 @@ will yield the following dictionary representation of the input::
208212
{
209213
'hamiltonian.hsdattrib': {'equal': True, 'line': 0, 'tag': 'Hamiltonian'},
210214
'hamiltonian': {
211-
'dftb.hsdattrib': {'line': 0, 'tag': 'Dftb'},
215+
'dftb.hsdattrib': {'line': 0, equal: False, 'tag': 'Dftb'},
212216
'dftb': {
213217
'scc.hsdattrib': {'equal': True, 'line': 1, 'tag': 'Scc'},
214218
'scc': True,
215219
'filling.hsdattrib': {'equal': True, 'line': 2, 'tag': 'Filling'},
216220
'filling': {
217-
'fermi.hsdattrib': {'line': 2, 'tag': 'Fermi'},
221+
'fermi.hsdattrib': {'line': 2, 'equal': False, 'tag': 'Fermi'},
218222
'fermi': {
219223
'temperature.attrib': 'Kelvin',
220224
'temperature.hsdattrib': {'equal': True, 'line': 3,

src/hsd/dict.py

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -80,27 +80,38 @@ def close_tag(self, tagname):
8080
attrib, hsdattrib = self._attribs.pop(-1)
8181
parentblock = self._parentblocks.pop(-1)
8282
prevcont = parentblock.get(tagname)
83-
if prevcont is not None:
84-
if isinstance(prevcont, dict) and self._data is None:
85-
prevcont = [prevcont]
86-
parentblock[tagname] = prevcont
87-
elif not (isinstance(prevcont, list)
88-
and isinstance(prevcont[0], dict)):
89-
msg = f"Invalid duplicate occurance of node '{tagname}'"
90-
raise HsdError(msg)
83+
if self._data is not None:
84+
if prevcont is None:
85+
parentblock[tagname] = self._data
86+
elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict):
87+
prevcont.append({None: self._data})
88+
elif isinstance(prevcont, dict):
89+
parentblock[tagname] = [prevcont, {None: self._data}]
90+
else:
91+
parentblock[tagname] = [{None: prevcont}, {None: self._data}]
92+
else:
93+
if prevcont is None:
94+
parentblock[tagname] = self._curblock
95+
elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict):
96+
prevcont.append(self._curblock)
97+
elif isinstance(prevcont, dict):
98+
parentblock[tagname] = [prevcont, self._curblock]
99+
else:
100+
parentblock[tagname] = [{None: prevcont}, self._curblock]
91101

92102
if prevcont is None:
93-
content = self._data if self._data is not None else self._curblock
94-
parentblock[tagname] = content
95103
if attrib:
96104
parentblock[tagname + ATTRIB_SUFFIX] = attrib
97105
if self._include_hsd_attribs:
98106
parentblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib
99107
else:
100-
prevcont.append(self._curblock)
101108
prevattrib = parentblock.get(tagname + ATTRIB_SUFFIX)
102-
if not (prevattrib is None and attrib is None):
103-
msg = f"Duplicate node '{tagname}' should not carry attributes"
109+
if isinstance(prevattrib, list):
110+
prevattrib.append(attrib)
111+
else:
112+
parentblock[tagname + ATTRIB_SUFFIX] = [prevattrib, attrib]
113+
print(f"parentblock[{tagname} + {ATTRIB_SUFFIX}] = [{prevattrib}, {attrib}]")
114+
104115
if self._include_hsd_attribs:
105116
prevhsdattrib = parentblock.get(tagname + HSD_ATTRIB_SUFFIX)
106117
if isinstance(prevhsdattrib, list):
@@ -189,8 +200,12 @@ def walk(self, dictobj):
189200
elif isinstance(value, list) and value and isinstance(value[0], dict):
190201
for ind, item in enumerate(value):
191202
hsdattr = hsdattrib[ind] if hsdattrib else None
192-
self._eventhandler.open_tag(key, None, hsdattr)
193-
self.walk(item)
203+
attr = attrib[ind] if attrib else None
204+
self._eventhandler.open_tag(key, attr, hsdattr)
205+
if None in item:
206+
self._eventhandler.add_text(_to_text(item[None]))
207+
else:
208+
self.walk(item)
194209
self._eventhandler.close_tag(key)
195210

196211
else:

src/hsd/formatter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def add_text(self, text: str):
9797
equal = self._followed_by_equal[-1]
9898
multiline = "\n" in text
9999
if equal is None and not multiline:
100-
if len(self._followed_by_equal) > 2:
100+
if len(self._followed_by_equal) > 1:
101101
equal = not self._followed_by_equal[-2]
102102
else:
103103
equal = True

src/hsd/io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def load_string(
114114
with the recorded data:
115115
116116
>>> data["dftb.hsdattrib"]
117-
{'line': 1, 'name': 'Dftb'}
117+
{'equal': False, 'line': 1, 'name': 'Dftb'}
118118
119119
This additional data can be then also used to format the tags in the
120120
original style, when writing the data in HSD-format again. Compare:

src/hsd/parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def _parse(self, line):
157157
# Opening tag by curly brace
158158
elif sign == "{":
159159
#self._has_child = True
160+
self._hsdattrib[common.HSD_ATTRIB_EQUAL] = False
160161
self._starttag(before, self._after_equal_sign)
161162
self._buffer = []
162163
self._after_equal_sign = False

test/test_dict.py

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#!/bin/env python3
2+
#------------------------------------------------------------------------------#
3+
# hsd-python: package for manipulating HSD-formatted data in Python #
4+
# Copyright (C) 2011 - 2021 DFTB+ developers group #
5+
# Licensed under the BSD 2-clause license. #
6+
#------------------------------------------------------------------------------#
7+
#
8+
"""Tests for the dictbuilder class"""
9+
10+
import io
11+
import pytest
12+
import hsd
13+
14+
_HSD_LINE = hsd.HSD_ATTRIB_LINE
15+
_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL
16+
_HSD_NAME = hsd.HSD_ATTRIB_NAME
17+
18+
# General test list format for valid tests
19+
# [("Test name", ([List of HSD events], expected dictionary outcome))]
20+
21+
# Tests without hsd attribute recording
22+
_TESTS_NO_HSDATTRIB = [
23+
(
24+
"Simple", (
25+
"Test {}",
26+
{"Test": {}},
27+
)
28+
),
29+
(
30+
"Data with quoted strings", (
31+
"O = SelectedShells { \"s\" \"p\" }",
32+
{"O": {"SelectedShells": ['"s"', '"p"']}},
33+
)
34+
),
35+
(
36+
"Attribute containing comma", (
37+
"PolarRadiusCharge [AA^3,AA,] = {\n1.030000 3.800000 2.820000\n}",
38+
{"PolarRadiusCharge": [1.03, 3.8, 2.82], "PolarRadiusCharge.attrib": "AA^3,AA,"},
39+
)
40+
),
41+
(
42+
"Duplicate node entry", (
43+
"a { b = 1 }\na { b = 2 }\n",
44+
{"a.attrib": [None, None], "a": [{"b": 1}, {"b": 2}]},
45+
)
46+
),
47+
(
48+
"Duplicate value entry", (
49+
"a = 1\na = 2",
50+
{"a.attrib": [None, None], "a": [{None: 1}, {None: 2}]},
51+
)
52+
),
53+
]
54+
_TESTS_NO_HSDATTRIB_NAMES, _TESTS_NO_HSDATTRIB_CASES = zip(*_TESTS_NO_HSDATTRIB)
55+
56+
57+
# Tests with HSD attribute recording
58+
# The input string should be formatted the same way as it comes out from the formatter since
59+
# these tests are also used to test backwards direction (dictionary -> string).
60+
_TESTS_HSDATTRIB = [
61+
(
62+
"Simple", (
63+
"Test {}\n",
64+
{"Test.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False}, "Test": {}}
65+
)
66+
),
67+
(
68+
"Data with quoted strings", (
69+
"O = SelectedShells {\n \"s\" \"p\"\n}\n",
70+
{
71+
"O.hsdattrib": {_HSD_EQUAL: True, _HSD_LINE: 0},
72+
"O": {
73+
"SelectedShells.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False},
74+
"SelectedShells": ['"s"', '"p"']
75+
}
76+
}
77+
)
78+
),
79+
(
80+
"Duplicate node", (
81+
"a {\n b = 1\n}\na {\n b = 2\n}\n",
82+
{
83+
"a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: False},
84+
{_HSD_LINE: 3, _HSD_EQUAL: False}],
85+
"a.attrib": [None, None],
86+
"a": [
87+
{"b.hsdattrib": {_HSD_LINE: 1, _HSD_EQUAL: True}, "b": 1},
88+
{"b.hsdattrib": {_HSD_LINE: 4, _HSD_EQUAL: True}, "b": 2}
89+
]
90+
},
91+
)
92+
),
93+
(
94+
"Duplicate value", (
95+
"a = 1\na = 2\n",
96+
{
97+
"a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True}],
98+
"a.attrib": [None, None],
99+
"a": [{None: 1}, {None: 2}]
100+
},
101+
)
102+
),
103+
(
104+
"Triple value with attrib", (
105+
"a = 1\na = 2\na [someunit] {\n 3\n}\n",
106+
{
107+
"a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True},
108+
{_HSD_LINE: 2, _HSD_EQUAL: False}],
109+
"a.attrib": [None, None, "someunit"],
110+
"a": [{None: 1}, {None: 2}, {None: 3}]
111+
},
112+
)
113+
),
114+
115+
]
116+
_TESTS_HSDATTRIB_NAMES, _TESTS_HSDATTRIB_CASES = zip(*_TESTS_HSDATTRIB)
117+
118+
119+
# Tests with HSD attribute recording and tag name lowering switched on
120+
# The input string should be formatted the same way as it comes out from the formatter since
121+
# these tests are also used to test backwards direction (dictionary -> string).
122+
_TESTS_HSDATTRIB_LOWER = [
123+
(
124+
"Simple", (
125+
"Test {}\n",
126+
{"test.hsdattrib": {_HSD_NAME: "Test", _HSD_LINE: 0, _HSD_EQUAL: False}, "test": {}}
127+
)
128+
),
129+
]
130+
_TESTS_HSDATTRIB_LOWER_NAMES, _TESTS_HSDATTRIB_LOWER_CASES = zip(*_TESTS_HSDATTRIB_LOWER)
131+
132+
133+
@pytest.mark.parametrize(
134+
"hsdstr,hsddict",
135+
_TESTS_NO_HSDATTRIB_CASES,
136+
ids=_TESTS_NO_HSDATTRIB_NAMES
137+
)
138+
def test_builder_nohsdattr(hsdstr, hsddict):
139+
"""Test transformation from hsd to dictionary without HSD attributes."""
140+
dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=False)
141+
parser = hsd.HsdParser(eventhandler=dictbuilder)
142+
fobj = io.StringIO(hsdstr)
143+
parser.parse(fobj)
144+
assert dictbuilder.hsddict == hsddict
145+
146+
147+
@pytest.mark.parametrize(
148+
"hsdstr,hsddict",
149+
_TESTS_HSDATTRIB_CASES,
150+
ids=_TESTS_HSDATTRIB_NAMES
151+
)
152+
def test_builder_hsdattr(hsdstr, hsddict):
153+
"""Test transformation from hsd to dictionary with HSD attributes."""
154+
dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True)
155+
parser = hsd.HsdParser(eventhandler=dictbuilder)
156+
fobj = io.StringIO(hsdstr)
157+
parser.parse(fobj)
158+
assert dictbuilder.hsddict == hsddict
159+
160+
161+
@pytest.mark.parametrize(
162+
"hsdstr,hsddict",
163+
_TESTS_HSDATTRIB_LOWER_CASES,
164+
ids=_TESTS_HSDATTRIB_LOWER_NAMES
165+
)
166+
def test_builder_hsdattr_lower(hsdstr, hsddict):
167+
"""Test transformation from hsd to dictionary with HSD attributes and case lowering."""
168+
dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True)
169+
parser = hsd.HsdParser(eventhandler=dictbuilder, lower_tag_names=True)
170+
fobj = io.StringIO(hsdstr)
171+
parser.parse(fobj)
172+
assert dictbuilder.hsddict == hsddict
173+
174+
175+
@pytest.mark.parametrize(
176+
"hsdstr,hsddict",
177+
_TESTS_HSDATTRIB_CASES,
178+
ids=_TESTS_HSDATTRIB_NAMES
179+
)
180+
def test_walker_hsdattr(hsdstr, hsddict):
181+
"""Test transformation from dictionary to string using HSD attributes."""
182+
output = io.StringIO()
183+
formatter = hsd.HsdFormatter(output, use_hsd_attribs=True)
184+
dictwalker = hsd.HsdDictWalker(formatter)
185+
dictwalker.walk(hsddict)
186+
assert output.getvalue() == hsdstr
187+
188+
189+
@pytest.mark.parametrize(
190+
"hsdstr,hsddict",
191+
_TESTS_HSDATTRIB_LOWER_CASES,
192+
ids=_TESTS_HSDATTRIB_LOWER_NAMES
193+
)
194+
def test_walker_hsdattr_lower(hsdstr, hsddict):
195+
"""Test transformation from dictionary to string using HSD attributes."""
196+
output = io.StringIO()
197+
formatter = hsd.HsdFormatter(output, use_hsd_attribs=True)
198+
dictwalker = hsd.HsdDictWalker(formatter)
199+
dictwalker.walk(hsddict)
200+
assert output.getvalue() == hsdstr

test/test_dictbuilder.py

Lines changed: 0 additions & 37 deletions
This file was deleted.

0 commit comments

Comments
 (0)