1- #------------------------------------------------------------------------------#
21# hsd-python: package for manipulating HSD-formatted data in Python #
32# Copyright (C) 2011 - 2021 DFTB+ developers group #
43# Licensed under the BSD 2-clause license. #
2928_SPECIAL_CHARS = "{}[]= "
3029
3130
32- def load (hsdfile : Union [TextIO , str ]) -> dict :
31+ def load (hsdfile : Union [TextIO , str ], lower_tag_names : bool = False ,
32+ include_hsd_attribs : bool = False , flatten_data : bool = False ) -> dict :
3333 """Loads a file with HSD-formatted data into a Python dictionary
3434
3535 Args:
3636 hsdfile: Name of file or file like object to read the HSD data from
37+ lower_tag_names: When set, all tag names will be converted to lower-case
38+ (practical, when input should be treated case insensitive.) If
39+ ``include_hsd_attribs`` is set, the original tag name will be
40+ stored among the HSD attributes.
41+ include_hsd_attribs: Whether the HSD-attributes (processing related
42+ attributes, like original tag name, line information, etc.) should
43+ be stored.
44+ flatten_data: Whether multiline data in the HSD input should be
45+ flattened into a single list. Othewise a list of lists is created,
46+ with one list for every line (default).
3747
3848 Returns:
3949 Dictionary representing the HSD data.
50+
51+ Examples:
52+ See :func:`hsd.load_string` for examples of usage.
4053 """
41- dictbuilder = HsdDictBuilder ()
42- parser = HsdParser (eventhandler = dictbuilder )
54+ dictbuilder = HsdDictBuilder (flatten_data = flatten_data ,
55+ include_hsd_attribs = include_hsd_attribs )
56+ parser = HsdParser (eventhandler = dictbuilder ,
57+ lower_tag_names = lower_tag_names )
4358 if isinstance (hsdfile , str ):
4459 with open (hsdfile , "r" ) as hsdfile :
4560 parser .feed (hsdfile )
@@ -48,11 +63,22 @@ def load(hsdfile: Union[TextIO, str]) -> dict:
4863 return dictbuilder .hsddict
4964
5065
51- def load_string (hsdstr : str ) -> dict :
66+ def load_string (hsdstr : str , lower_tag_names : bool = False ,
67+ include_hsd_attribs : bool = False , flatten_data : bool = False ) -> dict :
5268 """Loads a string with HSD-formatted data into a Python dictionary.
5369
5470 Args:
5571 hsdstr: String with HSD-formatted data.
72+ lower_tag_names: When set, all tag names will be converted to lower-case
73+ (practical, when input should be treated case insensitive.) If
74+ ``include_hsd_attribs`` is set, the original tag name will be
75+ stored among the HSD attributes.
76+ include_hsd_attribs: Whether the HSD-attributes (processing related
77+ attributes, like original tag name, line information, etc.) should
78+ be stored.
79+ flatten_data: Whether multiline data in the HSD input should be
80+ flattened into a single list. Othewise a list of lists is created,
81+ with one list for every line (default).
5682
5783 Returns:
5884 Dictionary representing the HSD data.
@@ -70,36 +96,81 @@ def load_string(hsdstr: str) -> dict:
7096 ... \" \" \"
7197 >>> hsd.load_string(hsdstr)
7298 {'Dftb': {'Scc': True, 'Filling': {'Fermi': {'Temperature.attrib': 'Kelvin', 'Temperature': 100}}}}
99+
100+ In order to ease the case-insensitive handling of the input, the tag
101+ names can be converted to lower case during reading using the
102+ ``lower_tag_names`` option.
103+
104+ >>> hsd.load_string(hsdstr, lower_tag_names=True)
105+ {'dftb': {'scc': True, 'filling': {'fermi': {'temperature.attrib': 'Kelvin', 'temperature': 100}}}}
106+
107+ The original tag names (together with additional information like the
108+ line number of a tag) can be recorded, if the ``include_hsd_attribs``
109+ option is set:
110+
111+ >>> data = hsd.load_string(hsdstr, lower_tag_names=True, include_hsd_attribs=True)
112+
113+ Each tag in the dictionary will have a corresponding ".hsdattrib" entry
114+ with the recorded data:
115+
116+ >>> data["dftb.hsdattrib"]
117+ {'line': 1, 'tag': 'Dftb'}
118+
119+ This additional data can be then also used to format the tags in the
120+ original style, when writing the data in HSD-format again. Compare:
121+
122+ >>> hsd.dump_string(data)
123+ 'dftb {\\ n scc = Yes\\ n filling {\\ n fermi {\\ n temperature [Kelvin] = 100\\ n }\\ n }\\ n}\\ n'
124+
125+ versus
126+
127+ >>> hsd.dump_string(data, use_hsd_attribs=True)
128+ 'Dftb {\\ n Scc = Yes\\ n Filling {\\ n Fermi {\\ n Temperature [Kelvin] = 100\\ n }\\ n }\\ n}\\ n'
129+
73130 """
74131 fobj = io .StringIO (hsdstr )
75- return load (fobj )
132+ return load (fobj , lower_tag_names , include_hsd_attribs , flatten_data )
76133
77134
78- def dump (data : dict , hsdfile : Union [TextIO , str ]):
135+ def dump (data : dict , hsdfile : Union [TextIO , str ],
136+ use_hsd_attribs : bool = False ):
79137 """Dumps data to a file in HSD format.
80138
81139 Args:
82140 data: Dictionary like object to be written in HSD format
83141 hsdfile: Name of file or file like object to write the result to.
142+ use_hsd_attribs: Whether HSD attributes in the data structure should
143+ be used to format the output.
144+
145+ This option can be used to for example to restore original tag
146+ names, if the file was loaded with the ``lower_tag_names`` and
147+ ``include_hsd_attribs`` options set.
84148
85149 Raises:
86150 TypeError: if object is not a dictionary instance.
151+
152+ Examples:
153+
154+ See :func:`hsd.load_string` for an example.
87155 """
88156 if not isinstance (data , dict ):
89157 msg = "Invalid object type"
90158 raise TypeError (msg )
91159 if isinstance (hsdfile , str ):
92160 with open (hsdfile , "w" ) as hsdfile :
93- _dump_dict (data , hsdfile , "" )
161+ _dump_dict (data , hsdfile , "" , use_hsd_attribs = use_hsd_attribs )
94162 else :
95- _dump_dict (data , hsdfile , "" )
163+ _dump_dict (data , hsdfile , "" , use_hsd_attribs = use_hsd_attribs )
96164
97165
98- def dump_string (data ) -> str :
166+ def dump_string (data : dict , use_hsd_attribs : bool = False ) -> str :
99167 """Serializes an object to string in HSD format.
100168
101169 Args:
102170 data: Dictionary like object to be written in HSD format.
171+ use_hsd_attribs: Whether HSD attributes of the data structure should
172+ be used to format the output (e.g. to restore original mixed case
173+ tag names)
103174
104175 Returns:
105176 HSD formatted string.
@@ -119,13 +190,15 @@ def dump_string(data) -> str:
119190 >>> hsd.dump_string(hsdtree)
120191 'Dftb {\\ n Scc = Yes\\ n Filling {\\ n Fermi {\\ n Temperature [Kelvin] = 100\\ n }\\ n }\\ n}\\ n'
121192
193+ See also :func:`hsd.load_string` for an example.
194+
122195 """
123196 result = io .StringIO ()
124- dump (data , result )
197+ dump (data , result , use_hsd_attribs = use_hsd_attribs )
125198 return result .getvalue ()
126199
127200
128- def _dump_dict (obj , fobj , indentstr ):
201+ def _dump_dict (obj , fobj , indentstr , use_hsd_attribs ):
129202 for key , value in obj .items ():
130203 if key .endswith (ATTRIB_SUFFIX ):
131204 if key [:- LEN_ATTRIB_SUFFIX ] in obj :
@@ -149,17 +222,23 @@ def _dump_dict(obj, fobj, indentstr):
149222 raise ValueError (msg )
150223 else :
151224 attribstr = " [" + attrib + "]"
225+ if use_hsd_attribs :
226+ hsdattribs = obj .get (key + HSD_ATTRIB_SUFFIX )
227+ if hsdattribs is not None :
228+ key = hsdattribs .get ("tag" , key )
152229 if isinstance (value , dict ):
153230 if value :
154231 fobj .write ("{}{}{} {{\n " .format (indentstr , key , attribstr ))
155- _dump_dict (value , fobj , indentstr + _INDENT_STR )
232+ _dump_dict (
233+ value , fobj , indentstr + _INDENT_STR , use_hsd_attribs )
156234 fobj .write ("{}}}\n " .format (indentstr ))
157235 else :
158236 fobj .write ("{}{}{} {{}}\n " .format (indentstr , key , attribstr ))
159237 elif isinstance (value , list ) and value and isinstance (value [0 ], dict ):
160238 for item in value :
161239 fobj .write ("{}{}{} {{\n " .format (indentstr , key , attribstr ))
162- _dump_dict (item , fobj , indentstr + _INDENT_STR )
240+ _dump_dict (
241+ item , fobj , indentstr + _INDENT_STR , use_hsd_attribs )
163242 fobj .write ("{}}}\n " .format (indentstr ))
164243 else :
165244 valstr = _get_hsd_rhs (value , indentstr )
0 commit comments