Skip to content

Commit c9b8553

Browse files
committed
Add HSD parser and builder for dict-based representation
1 parent cf02c39 commit c9b8553

11 files changed

Lines changed: 741 additions & 64 deletions

File tree

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright (c) 2020 Bálint Aradi, Universität Bremen
1+
Copyright (c) 2011-2020 DFTB+ developers group
22

33
All rights reserved.
44

README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
HSD — Human-friendly Structured Data
33
************************************
44

5-
This Python package contains utilities to write (and soon also to read) files in
5+
This Python package contains utilities to read and write files in
66
the Human-friendly Structured Data (HSD) format.
77

88
It is licensed under the *BSD 2-clause license*.

src/hsd/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#------------------------------------------------------------------------------#
2+
# hsd: package for manipulating HSD-formatted data #
3+
# Copyright (C) 2011 - 2020 DFTB+ developers group #
4+
# #
5+
# See the LICENSE file for terms of usage and distribution. #
6+
#------------------------------------------------------------------------------#
7+
#
8+
"""
9+
Central module for the hsd package
10+
"""
11+
from .dump import dump, dumps
12+
from .parser import HsdParser
13+
from .dictbuilder import HsdDictBuilder

src/hsd/common.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#------------------------------------------------------------------------------#
2+
# hsd: package for manipulating HSD-formatted data #
3+
# Copyright (C) 2011 - 2020 DFTB+ developers group #
4+
# #
5+
# See the LICENSE file for terms of usage and distribution. #
6+
#------------------------------------------------------------------------------#
7+
#
8+
"""
9+
Implements common functionalities for the HSD package
10+
"""
11+
12+
13+
class HsdException(Exception):
14+
"""Base class for exceptions in the HSD package."""
15+
pass
16+
17+
18+
class HsdQueryError(HsdException):
19+
"""Base class for errors detected by the HsdQuery object.
20+
21+
22+
Attributes:
23+
filename: Name of the file where error occured (or empty string).
24+
line: Line where the error occurred (or -1).
25+
tag: Name of the tag with the error (or empty string).
26+
"""
27+
28+
def __init__(self, msg="", node=None):
29+
"""Initializes the exception.
30+
31+
Args:
32+
msg: Error message
33+
node: HSD element where error occured (optional).
34+
"""
35+
super().__init__(msg)
36+
if node is not None:
37+
self.tag = node.gethsd(HSDATTR_TAG, node.tag)
38+
self.file = node.gethsd(HSDATTR_FILE, -1)
39+
self.line = node.gethsd(HSDATTR_LINE, None)
40+
else:
41+
self.tag = ""
42+
self.file = -1
43+
self.line = None
44+
45+
46+
class HsdParserError(HsdException):
47+
"""Base class for parser related errors."""
48+
pass
49+
50+
51+
def unquote(txt):
52+
"""Giving string without quotes if enclosed in those."""
53+
if len(txt) >= 2 and (txt[0] in "\"'") and txt[-1] == txt[0]:
54+
return txt[1:-1]
55+
return txt
56+
57+
58+
# Name for default attribute (when attribute name is not specified)
59+
DEFAULT_ATTRIBUTE = "attribute"
60+
61+
62+
HSDATTR_PROC = "processed"
63+
HSDATTR_EQUAL = "equal"
64+
HSDATTR_FILE = "file"
65+
HSDATTR_LINE = "line"
66+
HSDATTR_TAG = "tag"

src/hsd/dictbuilder.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#------------------------------------------------------------------------------#
2+
# hsd: package for manipulating HSD-formatted data #
3+
# Copyright (C) 2011 - 2020 DFTB+ developers group #
4+
# #
5+
# See the LICENSE file for terms of usage and distribution. #
6+
#------------------------------------------------------------------------------#
7+
#
8+
"""
9+
Contains an event-driven builder for dictionary based (JSON-like) structure
10+
"""
11+
import re
12+
from .parser import HsdEventHandler
13+
14+
__all__ = ['HsdDictBuilder']
15+
16+
17+
_TOKEN_PATTERN = re.compile(r"""
18+
(?:\s*(?:^|(?<=\s))(?P<int>[+-]?[0-9]+)(?:\s*$|\s+))
19+
|
20+
(?:\s*(?:^|(?<=\s))
21+
(?P<float>[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+)))
22+
|
23+
(?:\s*(?:^|(?<=\s))(?P<logical>[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+)))
24+
|
25+
(?:(?P<qstr>(?P<quote>['"]).*?(?P=quote)) | (?P<str>.+?)(?:$|\s+))
26+
""", re.VERBOSE | re.MULTILINE)
27+
28+
29+
class HsdDictBuilder(HsdEventHandler):
30+
"""Deserializes HSD into nested dictionaries"""
31+
32+
def __init__(self, flatten_data=False):
33+
HsdEventHandler.__init__(self)
34+
self._hsddict = {}
35+
self._curblock = self._hsddict
36+
self._parentblocks = []
37+
self._data = None
38+
self._flatten_data = flatten_data
39+
40+
41+
def open_tag(self, tagname, options, hsdoptions):
42+
for attrname, attrvalue in options.items():
43+
self._curblock[tagname + '.' + attrname] = attrvalue
44+
self._parentblocks.append(self._curblock)
45+
self._curblock = {}
46+
47+
48+
def close_tag(self, tagname):
49+
parentblock = self._parentblocks.pop(-1)
50+
prevcontent = parentblock.get(tagname)
51+
if prevcontent is not None and not isinstance(prevcontent, list):
52+
prevcontent = [prevcontent]
53+
parentblock[tagname] = prevcontent
54+
if self._data is None:
55+
content = self._curblock
56+
else:
57+
content = self._data
58+
self._data = None
59+
if prevcontent is None:
60+
parentblock[tagname] = content
61+
else:
62+
prevcontent.append(content)
63+
self._curblock = parentblock
64+
65+
66+
def add_text(self, text):
67+
self._data = self._text_to_data(text)
68+
69+
70+
@property
71+
def hsddict(self):
72+
"""Returns the dictionary which has been built"""
73+
return self._hsddict
74+
75+
76+
def _text_to_data(self, txt):
77+
data = []
78+
for line in txt.split("\n"):
79+
if self._flatten_data:
80+
linedata = data
81+
else:
82+
linedata = []
83+
for match in _TOKEN_PATTERN.finditer(line.strip()):
84+
if match.group("int"):
85+
linedata.append(int(match.group("int")))
86+
elif match.group("float"):
87+
linedata.append(float(match.group("float")))
88+
elif match.group("logical"):
89+
lowlog = match.group("logical").lower()
90+
linedata.append(lowlog == "yes")
91+
elif match.group("str"):
92+
linedata.append(match.group("str"))
93+
elif match.group("qstr"):
94+
linedata.append(match.group("qstr"))
95+
if not self._flatten_data:
96+
data.append(linedata)
97+
if len(data) == 1:
98+
if isinstance(data[0], list) and len(data[0]) == 1:
99+
return data[0][0]
100+
return data[0]
101+
return data

src/hsd.py renamed to src/hsd/dump.py

Lines changed: 16 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
#!/usr/bin/env python3
21
#------------------------------------------------------------------------------#
32
# hsd: package for manipulating HSD-formatted data #
4-
# Copyright (C) 2020 Bálint Aradi, Universität Bremen #
3+
# Copyright (C) 2011 - 2020 DFTB+ developers group #
54
# #
65
# See the LICENSE file for terms of usage and distribution. #
76
#------------------------------------------------------------------------------#
87
#
98
"""
10-
Provides functionality to convert Python structures to HSD
9+
Provides functionality to dump Python structures to HSD
1110
"""
1211
import io
1312
import numpy as np
13+
from .common import DEFAULT_ATTRIBUTE
1414

1515
__all__ = ['dump', 'dumps']
1616

@@ -20,8 +20,11 @@
2020
# String quoting delimiters (must be at least two)
2121
_QUOTING_CHARS = "\"'"
2222

23-
# Suffix for appending attributes
24-
_ATTRIBUTE_SUFFIX = ".attribute"
23+
# Special characters
24+
_SPECIAL_CHARS = "{}[]= "
25+
26+
27+
_ATTRIBUTE_SUFFIX = "." + DEFAULT_ATTRIBUTE
2528

2629

2730
def dump(obj, fobj):
@@ -130,63 +133,14 @@ def _item_to_hsd(item):
130133

131134

132135
def _str_to_hsd(string):
133-
is_present = [qc in string for qc in _QUOTING_CHARS]
134-
if sum(is_present) > 1:
136+
present = [qc in string for qc in _QUOTING_CHARS]
137+
nquotetypes = sum(present)
138+
delimiter = ""
139+
if not nquotetypes and True in [sc in string for sc in _SPECIAL_CHARS]:
140+
delimiter = _QUOTING_CHARS[0]
141+
elif nquotetypes == 1 and string[0] not in _QUOTING_CHARS:
142+
delimiter = _QUOTING_CHARS[1] if present[0] else _QUOTING_CHARS[0]
143+
elif nquotetypes > 1:
135144
msg = "String '{}' can not be quoted correctly".format(string)
136145
raise ValueError(msg)
137-
delimiter = _QUOTING_CHARS[0] if not is_present[0] else _QUOTING_CHARS[1]
138146
return delimiter + string + delimiter
139-
140-
141-
142-
if __name__ == "__main__":
143-
INPUT = {
144-
"Driver": {},
145-
"Hamiltonian": {
146-
"DFTB": {
147-
"Scc": True,
148-
"SccTolerance": 1e-10,
149-
"MaxSccIterations": 1000,
150-
"Mixer": {
151-
"Broyden": {}
152-
},
153-
"MaxAngularMomentum": {
154-
"O": "p",
155-
"H": "s"
156-
},
157-
"Filling": {
158-
"Fermi": {
159-
"Temperature": 1e-8,
160-
"Temperature.attribute": "Kelvin"
161-
}
162-
},
163-
"KPointsAndWeights": {
164-
"SupercellFolding": [[2, 0, 0], [0, 2, 0], [0, 0, 2],
165-
[0.5, 0.5, 0.5]]
166-
},
167-
"ElectricField": {
168-
"PointCharges": {
169-
"CoordsAndCharges": np.array(
170-
[[-0.94, -9.44, 1.2, 1.0],
171-
[-0.94, -9.44, 1.2, -1.0]])
172-
}
173-
},
174-
"SelectSomeAtoms": [1, 2, "3:-3"]
175-
}
176-
},
177-
"Analysis": {
178-
"ProjectStates": {
179-
"Region": [
180-
{
181-
"Atoms": [1, 2, 3],
182-
"Label": "region1",
183-
},
184-
{
185-
"Atoms": np.array([1, 2, 3]),
186-
"Label": "region2",
187-
}
188-
]
189-
}
190-
}
191-
}
192-
print(dumps(INPUT))

0 commit comments

Comments
 (0)