Make handling of repeated nodes consistent with DFTB+

aradi · aradi · commit 2110e6345ba3 · 2021-11-03T13:43:44.000+01:00
diff --git a/docs/hsd.rst b/docs/hsd.rst
@@ -162,6 +162,10 @@ nodes will be mapped to one key, which will contain a list of dictionaries
                   ]
               },
           ]
+          # Also attributes becomes a list. Due to technialc reasons the
+          # dictbuilder always creates an attribute list for mulitple nodes,
+          # even if none of the nodes carries an actual attribute.
+          "PointCharges.attrib": [None, None]
       }
   }
 
@@ -186,7 +190,7 @@ to record following additional data for each HSD node:
 
 If this information is being recorded, a special key with the
 ``.hsdattrib`` suffix will be generated for each node in the dictionary/JSON
-presentation. The correpsonding value will be a dictionary with those
+presentation. The corresponding value will be a dictionary with those
 information.
 
 As an example, let's store the input from the previous section ::
@@ -199,7 +203,7 @@ As an example, let's store the input from the previous section ::
   }
 
 in the file `test.hsd`, parse it and convert the node names to lower case
-(to make the input processing case-insensitive). Using the Python command ::
+(to make enable case-insensitive input processing). Using the Python command ::
 
   inpdict = hsd.load("test.hsd", lower_tag_names=True, include_hsd_attribs=True)
 
@@ -208,13 +212,13 @@ will yield the following dictionary representation of the input::
   {
       'hamiltonian.hsdattrib': {'equal': True, 'line': 0, 'tag': 'Hamiltonian'},
       'hamiltonian': {
-          'dftb.hsdattrib': {'line': 0, 'tag': 'Dftb'},
+          'dftb.hsdattrib': {'line': 0, equal: False, 'tag': 'Dftb'},
           'dftb': {
               'scc.hsdattrib': {'equal': True, 'line': 1, 'tag': 'Scc'},
               'scc': True,
               'filling.hsdattrib': {'equal': True, 'line': 2, 'tag': 'Filling'},
               'filling': {
-                  'fermi.hsdattrib': {'line': 2, 'tag': 'Fermi'},
+                  'fermi.hsdattrib': {'line': 2, 'equal': False, 'tag': 'Fermi'},
                   'fermi': {
                       'temperature.attrib': 'Kelvin',
                       'temperature.hsdattrib': {'equal': True, 'line': 3,
diff --git a/src/hsd/dict.py b/src/hsd/dict.py
@@ -80,27 +80,38 @@ def close_tag(self, tagname):
         attrib, hsdattrib = self._attribs.pop(-1)
         parentblock = self._parentblocks.pop(-1)
         prevcont = parentblock.get(tagname)
-        if prevcont is not None:
-            if isinstance(prevcont, dict) and self._data is None:
-                prevcont = [prevcont]
-                parentblock[tagname] = prevcont
-            elif not (isinstance(prevcont, list)
-                      and isinstance(prevcont[0], dict)):
-                msg = f"Invalid duplicate occurance of node '{tagname}'"
-                raise HsdError(msg)
+        if self._data is not None:
+            if prevcont is None:
+                parentblock[tagname] = self._data
+            elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict):
+                prevcont.append({None: self._data})
+            elif isinstance(prevcont, dict):
+                parentblock[tagname] = [prevcont, {None: self._data}]
+            else:
+                parentblock[tagname] = [{None: prevcont}, {None: self._data}]
+        else:
+            if prevcont is None:
+                parentblock[tagname] = self._curblock
+            elif isinstance(prevcont, list) and len(prevcont) > 0 and isinstance(prevcont[0], dict):
+                prevcont.append(self._curblock)
+            elif isinstance(prevcont, dict):
+                parentblock[tagname] = [prevcont, self._curblock]
+            else:
+                parentblock[tagname] = [{None: prevcont}, self._curblock]
 
         if prevcont is None:
-            content = self._data if self._data is not None else self._curblock
-            parentblock[tagname] = content
             if attrib:
                 parentblock[tagname + ATTRIB_SUFFIX] = attrib
             if self._include_hsd_attribs:
                 parentblock[tagname + HSD_ATTRIB_SUFFIX] = hsdattrib
         else:
-            prevcont.append(self._curblock)
             prevattrib = parentblock.get(tagname + ATTRIB_SUFFIX)
-            if not (prevattrib is None and attrib is None):
-                msg = f"Duplicate node '{tagname}' should not carry attributes"
+            if isinstance(prevattrib, list):
+                prevattrib.append(attrib)
+            else:
+                parentblock[tagname + ATTRIB_SUFFIX] = [prevattrib, attrib]
+                print(f"parentblock[{tagname} + {ATTRIB_SUFFIX}] = [{prevattrib}, {attrib}]")
+
             if self._include_hsd_attribs:
                 prevhsdattrib = parentblock.get(tagname + HSD_ATTRIB_SUFFIX)
                 if isinstance(prevhsdattrib, list):
@@ -189,8 +200,12 @@ def walk(self, dictobj):
             elif isinstance(value, list) and value and isinstance(value[0], dict):
                 for ind, item in enumerate(value):
                     hsdattr = hsdattrib[ind] if hsdattrib else None
-                    self._eventhandler.open_tag(key, None, hsdattr)
-                    self.walk(item)
+                    attr = attrib[ind] if attrib else None
+                    self._eventhandler.open_tag(key, attr, hsdattr)
+                    if None in item:
+                        self._eventhandler.add_text(_to_text(item[None]))
+                    else:
+                        self.walk(item)
                     self._eventhandler.close_tag(key)
 
             else:
diff --git a/src/hsd/formatter.py b/src/hsd/formatter.py
@@ -97,7 +97,7 @@ def add_text(self, text: str):
         equal = self._followed_by_equal[-1]
         multiline = "\n" in text
         if equal is None and not multiline:
-            if len(self._followed_by_equal) > 2:
+            if len(self._followed_by_equal) > 1:
                 equal = not self._followed_by_equal[-2]
             else:
                 equal = True
diff --git a/src/hsd/io.py b/src/hsd/io.py
@@ -114,7 +114,7 @@ def load_string(
         with the recorded data:
 
         >>> data["dftb.hsdattrib"]
-        {'line': 1, 'name': 'Dftb'}
+        {'equal': False, 'line': 1, 'name': 'Dftb'}
 
         This additional data can be then also used to format the tags in the
         original style, when writing the data in HSD-format again. Compare:
diff --git a/src/hsd/parser.py b/src/hsd/parser.py
@@ -157,6 +157,7 @@ def _parse(self, line):
             # Opening tag by curly brace
             elif sign == "{":
                 #self._has_child = True
+                self._hsdattrib[common.HSD_ATTRIB_EQUAL] = False
                 self._starttag(before, self._after_equal_sign)
                 self._buffer = []
                 self._after_equal_sign = False
diff --git a/test/test_dict.py b/test/test_dict.py
@@ -0,0 +1,200 @@
+#!/bin/env python3
+#------------------------------------------------------------------------------#
+#  hsd-python: package for manipulating HSD-formatted data in Python           #
+#  Copyright (C) 2011 - 2021  DFTB+ developers group                           #
+#  Licensed under the BSD 2-clause license.                                    #
+#------------------------------------------------------------------------------#
+#
+"""Tests for the dictbuilder class"""
+
+import io
+import pytest
+import hsd
+
+_HSD_LINE = hsd.HSD_ATTRIB_LINE
+_HSD_EQUAL = hsd.HSD_ATTRIB_EQUAL
+_HSD_NAME = hsd.HSD_ATTRIB_NAME
+
+# General test list format for valid tests
+# [("Test name", ([List of HSD events], expected dictionary outcome))]
+
+# Tests without hsd attribute recording
+_TESTS_NO_HSDATTRIB = [
+    (
+        "Simple", (
+            "Test {}",
+            {"Test": {}},
+        )
+    ),
+    (
+        "Data with quoted strings", (
+            "O = SelectedShells { \"s\" \"p\" }",
+            {"O": {"SelectedShells": ['"s"', '"p"']}},
+        )
+    ),
+    (
+        "Attribute containing comma", (
+            "PolarRadiusCharge [AA^3,AA,] = {\n1.030000  3.800000  2.820000\n}",
+            {"PolarRadiusCharge": [1.03, 3.8, 2.82], "PolarRadiusCharge.attrib": "AA^3,AA,"},
+        )
+    ),
+    (
+        "Duplicate node entry", (
+            "a { b = 1 }\na { b = 2 }\n",
+            {"a.attrib": [None, None], "a": [{"b": 1}, {"b": 2}]},
+        )
+    ),
+    (
+        "Duplicate value entry", (
+            "a = 1\na = 2",
+            {"a.attrib": [None, None], "a": [{None: 1}, {None: 2}]},
+        )
+    ),
+]
+_TESTS_NO_HSDATTRIB_NAMES, _TESTS_NO_HSDATTRIB_CASES = zip(*_TESTS_NO_HSDATTRIB)
+
+
+# Tests with HSD attribute recording
+# The input string should be formatted the same way as it comes out from the formatter since
+# these tests are also used to test backwards direction (dictionary -> string).
+_TESTS_HSDATTRIB = [
+    (
+        "Simple", (
+            "Test {}\n",
+            {"Test.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False}, "Test": {}}
+        )
+    ),
+    (
+        "Data with quoted strings", (
+            "O = SelectedShells {\n  \"s\" \"p\"\n}\n",
+            {
+                "O.hsdattrib": {_HSD_EQUAL: True, _HSD_LINE: 0},
+                "O": {
+                    "SelectedShells.hsdattrib": {_HSD_LINE: 0, _HSD_EQUAL: False},
+                    "SelectedShells": ['"s"', '"p"']
+                    }
+            }
+        )
+    ),
+    (
+        "Duplicate node", (
+            "a {\n  b = 1\n}\na {\n  b = 2\n}\n",
+            {
+                "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: False},
+                                {_HSD_LINE: 3, _HSD_EQUAL: False}],
+                "a.attrib": [None, None],
+                "a": [
+                    {"b.hsdattrib": {_HSD_LINE: 1, _HSD_EQUAL: True}, "b": 1},
+                    {"b.hsdattrib": {_HSD_LINE: 4, _HSD_EQUAL: True}, "b": 2}
+                ]
+            },
+        )
+    ),
+    (
+        "Duplicate value", (
+            "a = 1\na = 2\n",
+            {
+                "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True}],
+                "a.attrib": [None, None],
+                "a": [{None: 1}, {None: 2}]
+            },
+        )
+    ),
+    (
+        "Triple value with attrib", (
+            "a = 1\na = 2\na [someunit] {\n  3\n}\n",
+            {
+                "a.hsdattrib": [{_HSD_LINE: 0, _HSD_EQUAL: True}, {_HSD_LINE: 1, _HSD_EQUAL: True},
+                                {_HSD_LINE: 2, _HSD_EQUAL: False}],
+                "a.attrib": [None, None, "someunit"],
+                "a": [{None: 1}, {None: 2}, {None: 3}]
+            },
+        )
+    ),
+
+]
+_TESTS_HSDATTRIB_NAMES, _TESTS_HSDATTRIB_CASES = zip(*_TESTS_HSDATTRIB)
+
+
+# Tests with HSD attribute recording and tag name lowering switched on
+# The input string should be formatted the same way as it comes out from the formatter since
+# these tests are also used to test backwards direction (dictionary -> string).
+_TESTS_HSDATTRIB_LOWER = [
+    (
+        "Simple", (
+            "Test {}\n",
+            {"test.hsdattrib": {_HSD_NAME: "Test", _HSD_LINE: 0, _HSD_EQUAL: False}, "test": {}}
+        )
+    ),
+]
+_TESTS_HSDATTRIB_LOWER_NAMES, _TESTS_HSDATTRIB_LOWER_CASES = zip(*_TESTS_HSDATTRIB_LOWER)
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_NO_HSDATTRIB_CASES,
+    ids=_TESTS_NO_HSDATTRIB_NAMES
+)
+def test_builder_nohsdattr(hsdstr, hsddict):
+    """Test transformation from hsd to dictionary without HSD attributes."""
+    dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=False)
+    parser = hsd.HsdParser(eventhandler=dictbuilder)
+    fobj = io.StringIO(hsdstr)
+    parser.parse(fobj)
+    assert dictbuilder.hsddict == hsddict
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_CASES,
+    ids=_TESTS_HSDATTRIB_NAMES
+)
+def test_builder_hsdattr(hsdstr, hsddict):
+    """Test transformation from hsd to dictionary with HSD attributes."""
+    dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True)
+    parser = hsd.HsdParser(eventhandler=dictbuilder)
+    fobj = io.StringIO(hsdstr)
+    parser.parse(fobj)
+    assert dictbuilder.hsddict == hsddict
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_LOWER_CASES,
+    ids=_TESTS_HSDATTRIB_LOWER_NAMES
+)
+def test_builder_hsdattr_lower(hsdstr, hsddict):
+    """Test transformation from hsd to dictionary with HSD attributes and case lowering."""
+    dictbuilder = hsd.HsdDictBuilder(include_hsd_attribs=True)
+    parser = hsd.HsdParser(eventhandler=dictbuilder, lower_tag_names=True)
+    fobj = io.StringIO(hsdstr)
+    parser.parse(fobj)
+    assert dictbuilder.hsddict == hsddict
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_CASES,
+    ids=_TESTS_HSDATTRIB_NAMES
+)
+def test_walker_hsdattr(hsdstr, hsddict):
+    """Test transformation from dictionary to string using HSD attributes."""
+    output = io.StringIO()
+    formatter = hsd.HsdFormatter(output, use_hsd_attribs=True)
+    dictwalker = hsd.HsdDictWalker(formatter)
+    dictwalker.walk(hsddict)
+    assert output.getvalue() == hsdstr
+
+
+@pytest.mark.parametrize(
+    "hsdstr,hsddict",
+    _TESTS_HSDATTRIB_LOWER_CASES,
+    ids=_TESTS_HSDATTRIB_LOWER_NAMES
+)
+def test_walker_hsdattr_lower(hsdstr, hsddict):
+    """Test transformation from dictionary to string using HSD attributes."""
+    output = io.StringIO()
+    formatter = hsd.HsdFormatter(output, use_hsd_attribs=True)
+    dictwalker = hsd.HsdDictWalker(formatter)
+    dictwalker.walk(hsddict)
+    assert output.getvalue() == hsdstr
diff --git a/test/test_dictbuilder.py b/test/test_dictbuilder.py
diff --git a/test/test_parser.py b/test/test_parser.py