Skip to content

Commit b6c99b9

Browse files
authored
Add remaining symmetry group fields (Hall, HM symbols, IT number) (#2240)
* Add remaining symmetry group fields * Add remaining symmetry group fields * Add a rough regexp for HM symbols and testing
1 parent 56450cb commit b6c99b9

8 files changed

Lines changed: 268 additions & 4 deletions

File tree

openapi/openapi.json

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4371,6 +4371,68 @@
43714371
"x-optimade-queryable": "optional",
43724372
"x-optimade-support": "optional"
43734373
},
4374+
"space_group_symbol_hall": {
4375+
"anyOf": [
4376+
{
4377+
"type": "string"
4378+
},
4379+
{
4380+
"type": "null"
4381+
}
4382+
],
4383+
"title": "Space Group Symbol Hall",
4384+
"description": "A Hall space group symbol representing the symmetry of the structure as defined in (Hall, 1981, 1981a).\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The change-of-basis operations are used as defined in the International Tables of Crystallography (ITC) Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001).\n - Each component of the Hall symbol MUST be separated by a single space symbol.\n - If there exists a standard Hall symbol which represents the symmetry it SHOULD be used.\n - MUST be `null` if `nperiodic_dimensions` is not equal to 3.\n\n- **Examples**:\n - Space group symbols with explicit origin (the Hall symbols):\n - `P 2c -2ac`\n - `I 4bd 2ab 3`\n - Space group symbols with change-of-basis operations:\n - `P 2yb (-1/2*x+z,1/2*x,y)`\n - `-I 4 2 (1/2*x+1/2*y,-1/2*x+1/2*y,z)`\n\n- **Bibliographic References**:\n - Hall, S. R. (1981) Space-group notation with an explicit origin. Acta Crystallographica Section A, 37, 517-525, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001228\n - Hall, S. R. (1981a) Space-group notation with an explicit origin; erratum. Acta Crystallographica Section A, 37, 921-921, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001976\n - IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.",
4385+
"x-optimade-queryable": "optional",
4386+
"x-optimade-support": "optional"
4387+
},
4388+
"space_group_symbol_hermann_mauguin": {
4389+
"anyOf": [
4390+
{
4391+
"type": "string",
4392+
"pattern": "^(P|I|F|A|B|C|R)(\\s+\\d+|\\s+[a-z]+|\\s+\\d+/[a-z]+|\\s+\\d+/\\d+|\\s+-\\d*|\\s+\\d+/m|\\s+[a-z]+/m)*$"
4393+
},
4394+
{
4395+
"type": "null"
4396+
}
4397+
],
4398+
"pattern": "^(P|I|F|A|B|C|R)(\\s+\\d+|\\s+[a-z]+|\\s+\\d+/[a-z]+|\\s+\\d+/\\d+|\\s+-\\d*|\\s+\\d+/m|\\s+[a-z]+/m)*$",
4399+
"title": "Space Group Symbol Hermann Mauguin",
4400+
"description": "A human- and machine-readable string containing the short Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The H-M symbol SHOULD aim to convey the closest representation of the symmetry information that can be specified using the short format used in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1 as described in the accompanying text.\n - The symbol MAY be a non-standard short H-M symbol.\n - The H-M symbol does not unambiguously communicate the axis, cell, and origin choice, and the given symbol SHOULD NOT be amended to convey this information.\n - To encode as character strings, the following adaptations MUST be made when representing H-M symbols given in their typesetted form:\n - the overbar above the numbers MUST be changed to the minus sign in front of the digit (e.g. '-2');\n - subscripts that denote screw axes are written as digits immediately after the axis designator without a space (e.g. 'P 32')\n - the space group generators MUST be separated by a single space (e.g. 'P 21 21 2');\n - there MUST be no spaces in the space group generator designation (i.e. use 'P 21/m', not the 'P 21 / m');\n\n- **Examples**:\n - `C 2`\n - `P 21 21 21`\n\n- **Bibliographic References**:\n - IUCr (2005). International Tables for Crystallography vol. A. Space-Group Symmetry. Ed. Theo Hahn. 5-th edition. Dordrecht, Springer.\n",
4401+
"x-optimade-queryable": "optional",
4402+
"x-optimade-support": "optional"
4403+
},
4404+
"space_group_symbol_hermann_mauguin_extended": {
4405+
"anyOf": [
4406+
{
4407+
"type": "string",
4408+
"pattern": "^(P|I|F|A|B|C|R)(\\s+\\d+|\\s+[a-z]+|\\s+\\d+/[a-z]+|\\s+\\d+/\\d+|\\s+-\\d*|\\s+\\d+/m|\\s+[a-z]+/m)*$"
4409+
},
4410+
{
4411+
"type": "null"
4412+
}
4413+
],
4414+
"pattern": "^(P|I|F|A|B|C|R)(\\s+\\d+|\\s+[a-z]+|\\s+\\d+/[a-z]+|\\s+\\d+/\\d+|\\s+-\\d*|\\s+\\d+/m|\\s+[a-z]+/m)*$",
4415+
"title": "Space Group Symbol Hermann Mauguin Extended",
4416+
"description": "A human- and machine-readable string containing the extended Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The H-M symbols SHOULD be given as specified in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1.\n - The change-of-basis operation SHOULD be provided for the non-standard axis and cell choices.\n - The extended H-M symbol does not unambiguously communicate the origin choice, and the given symbol SHOULD NOT be amended to convey this information.\n - The description of the change-of-basis SHOULD follow conventions of the ITC Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001).\n - The same character string encoding conventions MUST be used as for the specification of the `space_group_symbol_hermann_mauguin` property.\n\n- **Examples**:\n - `C 1 2 1`\n\n- **Bibliographic References**:\n - IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.\n - IUCr (2005). International Tables for Crystallography vol. A. Space-Group Symmetry. Ed. Theo Hahn. 5-th edition. Dordrecht, Springer.\n\n",
4417+
"x-optimade-queryable": "optional",
4418+
"x-optimade-support": "optional"
4419+
},
4420+
"space_group_it_number": {
4421+
"anyOf": [
4422+
{
4423+
"type": "integer",
4424+
"maximum": 230.0,
4425+
"minimum": 1.0
4426+
},
4427+
{
4428+
"type": "null"
4429+
}
4430+
],
4431+
"title": "Space Group It Number",
4432+
"description": "Space group number which specifies the space group of the structure as defined in the International Tables for Crystallography Vol. A. (IUCr, 2005).\n\n- **Type**: integer\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n - The integer value MUST be between 1 and 230.\n - MUST be null if `nperiodic_dimensions` is not equal to 3.",
4433+
"x-optimade-queryable": "optional",
4434+
"x-optimade-support": "optional"
4435+
},
43744436
"cartesian_site_positions": {
43754437
"anyOf": [
43764438
{

optimade/models/structures.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
ANONYMOUS_ELEMENTS,
1212
CHEMICAL_FORMULA_REGEXP,
1313
CHEMICAL_SYMBOLS,
14+
HM_SYMBOL_REGEXP,
1415
OptimadeField,
1516
StrictField,
1617
SupportLevel,
@@ -601,6 +602,119 @@ class StructureResourceAttributes(EntryResourceAttributes):
601602
),
602603
] = None
603604

605+
space_group_symbol_hall: Annotated[
606+
str | None,
607+
OptimadeField(
608+
description="""A Hall space group symbol representing the symmetry of the structure as defined in (Hall, 1981, 1981a).
609+
610+
- **Type**: string
611+
612+
- **Requirements/Conventions**:
613+
- **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.
614+
- **Query**: Support for queries on this property is OPTIONAL.
615+
- The change-of-basis operations are used as defined in the International Tables of Crystallography (ITC) Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001).
616+
- Each component of the Hall symbol MUST be separated by a single space symbol.
617+
- If there exists a standard Hall symbol which represents the symmetry it SHOULD be used.
618+
- MUST be `null` if `nperiodic_dimensions` is not equal to 3.
619+
620+
- **Examples**:
621+
- Space group symbols with explicit origin (the Hall symbols):
622+
- `P 2c -2ac`
623+
- `I 4bd 2ab 3`
624+
- Space group symbols with change-of-basis operations:
625+
- `P 2yb (-1/2*x+z,1/2*x,y)`
626+
- `-I 4 2 (1/2*x+1/2*y,-1/2*x+1/2*y,z)`
627+
628+
- **Bibliographic References**:
629+
- Hall, S. R. (1981) Space-group notation with an explicit origin. Acta Crystallographica Section A, 37, 517-525, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001228
630+
- Hall, S. R. (1981a) Space-group notation with an explicit origin; erratum. Acta Crystallographica Section A, 37, 921-921, International Union of Crystallography (IUCr), DOI: https://doi.org/10.1107/s0567739481001976
631+
- IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.""",
632+
support=SupportLevel.OPTIONAL,
633+
queryable=SupportLevel.OPTIONAL,
634+
),
635+
] = None
636+
637+
space_group_symbol_hermann_mauguin: Annotated[
638+
str | None,
639+
OptimadeField(
640+
description="""A human- and machine-readable string containing the short Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response.
641+
642+
- **Type**: string
643+
644+
- **Requirements/Conventions**:
645+
- **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.
646+
- **Query**: Support for queries on this property is OPTIONAL.
647+
- The H-M symbol SHOULD aim to convey the closest representation of the symmetry information that can be specified using the short format used in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1 as described in the accompanying text.
648+
- The symbol MAY be a non-standard short H-M symbol.
649+
- The H-M symbol does not unambiguously communicate the axis, cell, and origin choice, and the given symbol SHOULD NOT be amended to convey this information.
650+
- To encode as character strings, the following adaptations MUST be made when representing H-M symbols given in their typesetted form:
651+
- the overbar above the numbers MUST be changed to the minus sign in front of the digit (e.g. '-2');
652+
- subscripts that denote screw axes are written as digits immediately after the axis designator without a space (e.g. 'P 32')
653+
- the space group generators MUST be separated by a single space (e.g. 'P 21 21 2');
654+
- there MUST be no spaces in the space group generator designation (i.e. use 'P 21/m', not the 'P 21 / m');
655+
656+
- **Examples**:
657+
- `C 2`
658+
- `P 21 21 21`
659+
660+
- **Bibliographic References**:
661+
- IUCr (2005). International Tables for Crystallography vol. A. Space-Group Symmetry. Ed. Theo Hahn. 5-th edition. Dordrecht, Springer.
662+
""",
663+
support=SupportLevel.OPTIONAL,
664+
queryable=SupportLevel.OPTIONAL,
665+
pattern=HM_SYMBOL_REGEXP,
666+
),
667+
] = None
668+
669+
space_group_symbol_hermann_mauguin_extended: Annotated[
670+
str | None,
671+
OptimadeField(
672+
description="""A human- and machine-readable string containing the extended Hermann-Mauguin (H-M) symbol which specifies the space group of the structure in the response.
673+
674+
- **Type**: string
675+
676+
- **Requirements/Conventions**:
677+
- **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.
678+
- **Query**: Support for queries on this property is OPTIONAL.
679+
- The H-M symbols SHOULD be given as specified in the International Tables for Crystallography vol. A (IUCr, 2005), Table 4.3.2.1.
680+
- The change-of-basis operation SHOULD be provided for the non-standard axis and cell choices.
681+
- The extended H-M symbol does not unambiguously communicate the origin choice, and the given symbol SHOULD NOT be amended to convey this information.
682+
- The description of the change-of-basis SHOULD follow conventions of the ITC Vol. B, Sect. 1.4, Appendix A1.4.2 (IUCr, 2001).
683+
- The same character string encoding conventions MUST be used as for the specification of the `space_group_symbol_hermann_mauguin` property.
684+
685+
- **Examples**:
686+
- `C 1 2 1`
687+
688+
- **Bibliographic References**:
689+
- IUCr (2001). International Tables for Crystallography vol. B. Reciprocal Space. Ed. U. Shmueli. 2-nd edition. Dordrecht/Boston/London, Kluwer Academic Publishers.
690+
- IUCr (2005). International Tables for Crystallography vol. A. Space-Group Symmetry. Ed. Theo Hahn. 5-th edition. Dordrecht, Springer.
691+
692+
""",
693+
support=SupportLevel.OPTIONAL,
694+
queryable=SupportLevel.OPTIONAL,
695+
pattern=HM_SYMBOL_REGEXP,
696+
),
697+
] = None
698+
699+
space_group_it_number: Annotated[
700+
int | None,
701+
OptimadeField(
702+
description="""Space group number which specifies the space group of the structure as defined in the International Tables for Crystallography Vol. A. (IUCr, 2005).
703+
704+
- **Type**: integer
705+
706+
- **Requirements/Conventions**:
707+
- **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.
708+
- **Query**: Support for queries on this property is OPTIONAL.
709+
- The integer value MUST be between 1 and 230.
710+
- MUST be null if `nperiodic_dimensions` is not equal to 3.""",
711+
support=SupportLevel.OPTIONAL,
712+
queryable=SupportLevel.OPTIONAL,
713+
ge=1,
714+
le=230,
715+
),
716+
] = None
717+
604718
cartesian_site_positions: Annotated[
605719
list[Vector3D] | None,
606720
OptimadeField(

optimade/models/utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,18 @@ def reduce_formula(formula: str) -> str:
234234

235235
CHEMICAL_FORMULA_REGEXP = r"(^$)|^([A-Z][a-z]?([2-9]|[1-9]\d+)?)+$"
236236
SYMMETRY_OPERATION_REGEXP = r"^([-+]?[xyz]([-+][xyz])?([-+](1/2|[12]/3|[1-3]/4|[1-5]/6))?|[-+]?(1/2|[12]/3|[1-3]/4|[1-5]/6)([-+][xyz]([-+][xyz])?)?),([-+]?[xyz]([-+][xyz])?([-+](1/2|[12]/3|[1-3]/4|[1-5]/6))?|[-+]?(1/2|[12]/3|[1-3]/4|[1-5]/6)([-+][xyz]([-+][xyz])?)?),([-+]?[xyz]([-+][xyz])?([-+](1/2|[12]/3|[1-3]/4|[1-5]/6))?|[-+]?(1/2|[12]/3|[1-3]/4|[1-5]/6)([-+][xyz]([-+][xyz])?)?)$"
237+
HM_SYMBOL_REGEXP = r"^(P|I|F|A|B|C|R)(\s+\d+|\s+[a-z]+|\s+\d+/[a-z]+|\s+\d+/\d+|\s+-\d*|\s+\d+/m|\s+[a-z]+/m)*$"
238+
239+
240+
def _generate_symmetry_operation_regex():
241+
translation = "1/2|[12]/3|[1-3]/4|[1-5]/6"
242+
translation_appended = f"[-+]? [xyz] ([-+][xyz])? ([-+] ({translation}) )?"
243+
translation_prepended = f"[-+]? ({translation}) ([-+] [xyz] ([-+][xyz])? )?"
244+
symop = f"({translation_appended}|{translation_prepended})".replace(" ", "")
245+
return f"^{symop},{symop},{symop}$"
246+
247+
248+
SPACE_GROUP_SYMMETRY_OPERATION_REGEX = _generate_symmetry_operation_regex()
237249

238250
EXTRA_SYMBOLS = ["X", "vacancy"]
239251

tests/adapters/structures/test_structures.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ def compare_lossy_conversion(
192192
"species",
193193
"fractional_site_positions",
194194
"space_group_symmetry_operations_xyz",
195+
"space_group_symbol_hall",
196+
"space_group_symbol_hermann_mauguin",
197+
"space_group_symbol_hermann_mauguin_extended",
198+
"space_group_it_number",
195199
)
196200
array_keys = ("cartesian_site_positions", "lattice_vectors")
197201

tests/models/test_data/test_good_structures.json

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,10 @@
191191
{"name": "P", "chemical_symbols": ["P"], "concentration": [1.0] }
192192
],
193193
"structure_features": ["site_attachments"],
194-
"space_group_symmetry_operations_xyz": ["x,y,z", "-x,y,-z", "x+1/2,y+1/2,z", "-x+1/2,y+1/2,-z"]
194+
"space_group_symmetry_operations_xyz": ["x,y,z", "-x,y,-z", "x+1/2,y+1/2,z", "-x+1/2,y+1/2,-z"],
195+
"space_group_symbol_hermann_mauguin": "R -3 m",
196+
"space_group_symbol_hermann_mauguin_extended": "R -3 m",
197+
"space_group_symbol_hall": "I 4bd 2ab 3"
195198
},
196199
{
197200
"task_id": "db/1234567",
@@ -224,6 +227,10 @@
224227
{"name": "P", "chemical_symbols": ["P"], "concentration": [1.0] }
225228
],
226229
"structure_features": ["disorder", "site_attachments"],
227-
"space_group_symmetry_operations_xyz": ["x,y,z"]
230+
"space_group_symmetry_operations_xyz": ["x,y,z"],
231+
"space_group_symbol_hall": "P 2yb (-1/2*x+z,1/2*x,y)",
232+
"space_group_symbol_hermann_mauguin": "P 1",
233+
"space_group_symbol_hermann_mauguin_extended": "P 1",
234+
"space_group_it_number": 122
228235
}
229236
]

tests/models/test_structures.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,10 @@ def test_bad_structures(
211211
{"space_group_symmetry_operations_xyz": ["xy,z"]},
212212
"String should match pattern",
213213
),
214+
(
215+
{"space_group_symbol_hermann_mauguin": "P1"},
216+
"String should match pattern",
217+
),
214218
)
215219

216220

tests/models/test_utils.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
1+
import re
12
from collections.abc import Callable
23

34
import pytest
45
from pydantic import BaseModel, Field, ValidationError
56

6-
from optimade.models.utils import OptimadeField, StrictField, SupportLevel
7+
from optimade.models.utils import (
8+
HM_SYMBOL_REGEXP,
9+
OptimadeField,
10+
StrictField,
11+
SupportLevel,
12+
)
713

814

915
def make_bad_models(field: Callable):
@@ -159,3 +165,58 @@ def test_anonymize_formula():
159165
assert anonymize_formula("Si1 O2") == "A2B"
160166
assert anonymize_formula("Si11 O2") == "A11B2"
161167
assert anonymize_formula("Si10 O2C4") == "A5B2C"
168+
169+
170+
VALID_HM_SYMBOLS = [
171+
"P 1", # Triclinic
172+
"P -1",
173+
"P 2", # Monoclinic
174+
"P 21",
175+
"P m",
176+
"P c",
177+
"P 2/m",
178+
"P 21/c",
179+
"P 21/n",
180+
"C 2/c",
181+
"P 2 2 2", # Orthorhombic
182+
"P 21 21 21",
183+
"P n n n",
184+
"P m m a",
185+
"F d d d",
186+
"I m m a",
187+
"P 4", # Tetragonal
188+
"P 41",
189+
"P 42",
190+
"P 43",
191+
"I 4/m m m",
192+
"P 3", # Trigonal
193+
"R 3",
194+
"P 31",
195+
"R -3 m",
196+
"P 6", # Hexagonal
197+
"P 63/m m c",
198+
"P m -3", # Cubic
199+
"F m -3 m",
200+
"I a -3 d",
201+
]
202+
203+
INVALID_HM_SYMBOLS = [
204+
"", # Empty string
205+
"p 1", # Lowercase lattice
206+
"Q 1", # Invalid lattice
207+
"P1", # No space
208+
"1 P", # Wrong order
209+
"P 2/c/m", # Invalid combination
210+
"PP 2", # Double letter
211+
"X -3 m", # Invalid lattice
212+
]
213+
214+
215+
@pytest.mark.parametrize("hm_symbol", VALID_HM_SYMBOLS)
216+
def test_hm_symbol_regexp(hm_symbol):
217+
assert re.match(HM_SYMBOL_REGEXP, hm_symbol)
218+
219+
220+
@pytest.mark.parametrize("hm_symbol", INVALID_HM_SYMBOLS)
221+
def test_invalid_space_groups(hm_symbol):
222+
assert re.match(HM_SYMBOL_REGEXP, hm_symbol) is None

tests/server/test_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ def test_list_properties(
509509

510510
results = cli.list_properties("structures")
511511
for database in results:
512-
assert len(results[database]) == 23, str(results[database])
512+
assert len(results[database]) == 27, str(results[database])
513513

514514
results = cli.search_property("structures", "site")
515515
for database in results:

0 commit comments

Comments
 (0)