|
| 1 | +#------------------------------------------------------------------------------# |
| 2 | +# hsd: package for manipulating HSD-formatted data # |
| 3 | +# Copyright (C) 2011 - 2020 DFTB+ developers group # |
| 4 | +# # |
| 5 | +# See the LICENSE file for terms of usage and distribution. # |
| 6 | +#------------------------------------------------------------------------------# |
| 7 | +# |
| 8 | +""" |
| 9 | +Contains an event-driven builder for dictionary based (JSON-like) structure |
| 10 | +""" |
| 11 | +import re |
| 12 | +from .parser import HsdEventHandler |
| 13 | + |
| 14 | +__all__ = ['HsdDictBuilder'] |
| 15 | + |
| 16 | + |
| 17 | +_TOKEN_PATTERN = re.compile(r""" |
| 18 | +(?:\s*(?:^|(?<=\s))(?P<int>[+-]?[0-9]+)(?:\s*$|\s+)) |
| 19 | +| |
| 20 | +(?:\s*(?:^|(?<=\s)) |
| 21 | +(?P<float>[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?)(?:$|(?=\s+))) |
| 22 | +| |
| 23 | +(?:\s*(?:^|(?<=\s))(?P<logical>[Yy][Ee][Ss]|[Nn][Oo])(?:$|(?=\s+))) |
| 24 | +| |
| 25 | +(?:(?P<qstr>(?P<quote>['"]).*?(?P=quote)) | (?P<str>.+?)(?:$|\s+)) |
| 26 | +""", re.VERBOSE | re.MULTILINE) |
| 27 | + |
| 28 | + |
| 29 | +class HsdDictBuilder(HsdEventHandler): |
| 30 | + """Deserializes HSD into nested dictionaries""" |
| 31 | + |
| 32 | + def __init__(self, flatten_data=False): |
| 33 | + HsdEventHandler.__init__(self) |
| 34 | + self._hsddict = {} |
| 35 | + self._curblock = self._hsddict |
| 36 | + self._parentblocks = [] |
| 37 | + self._data = None |
| 38 | + self._flatten_data = flatten_data |
| 39 | + |
| 40 | + |
| 41 | + def open_tag(self, tagname, options, hsdoptions): |
| 42 | + for attrname, attrvalue in options.items(): |
| 43 | + self._curblock[tagname + '.' + attrname] = attrvalue |
| 44 | + self._parentblocks.append(self._curblock) |
| 45 | + self._curblock = {} |
| 46 | + |
| 47 | + |
| 48 | + def close_tag(self, tagname): |
| 49 | + parentblock = self._parentblocks.pop(-1) |
| 50 | + prevcontent = parentblock.get(tagname) |
| 51 | + if prevcontent is not None and not isinstance(prevcontent, list): |
| 52 | + prevcontent = [prevcontent] |
| 53 | + parentblock[tagname] = prevcontent |
| 54 | + if self._data is None: |
| 55 | + content = self._curblock |
| 56 | + else: |
| 57 | + content = self._data |
| 58 | + self._data = None |
| 59 | + if prevcontent is None: |
| 60 | + parentblock[tagname] = content |
| 61 | + else: |
| 62 | + prevcontent.append(content) |
| 63 | + self._curblock = parentblock |
| 64 | + |
| 65 | + |
| 66 | + def add_text(self, text): |
| 67 | + self._data = self._text_to_data(text) |
| 68 | + |
| 69 | + |
| 70 | + @property |
| 71 | + def hsddict(self): |
| 72 | + """Returns the dictionary which has been built""" |
| 73 | + return self._hsddict |
| 74 | + |
| 75 | + |
| 76 | + def _text_to_data(self, txt): |
| 77 | + data = [] |
| 78 | + for line in txt.split("\n"): |
| 79 | + if self._flatten_data: |
| 80 | + linedata = data |
| 81 | + else: |
| 82 | + linedata = [] |
| 83 | + for match in _TOKEN_PATTERN.finditer(line.strip()): |
| 84 | + if match.group("int"): |
| 85 | + linedata.append(int(match.group("int"))) |
| 86 | + elif match.group("float"): |
| 87 | + linedata.append(float(match.group("float"))) |
| 88 | + elif match.group("logical"): |
| 89 | + lowlog = match.group("logical").lower() |
| 90 | + linedata.append(lowlog == "yes") |
| 91 | + elif match.group("str"): |
| 92 | + linedata.append(match.group("str")) |
| 93 | + elif match.group("qstr"): |
| 94 | + linedata.append(match.group("qstr")) |
| 95 | + if not self._flatten_data: |
| 96 | + data.append(linedata) |
| 97 | + if len(data) == 1: |
| 98 | + if isinstance(data[0], list) and len(data[0]) == 1: |
| 99 | + return data[0][0] |
| 100 | + return data[0] |
| 101 | + return data |
0 commit comments