Skip to content
This repository was archived by the owner on Dec 26, 2025. It is now read-only.

Commit 66855bf

Browse files
committed
Use xmltodict to read faster without validation
Added test cases
1 parent 7774658 commit 66855bf

7 files changed

Lines changed: 69 additions & 34 deletions

File tree

README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ For ADI a user definition is a dictionary of
3333
The library also supports ADX import/export as compatible as possible to the ADI part.
3434
Though it will differ in handling application and user definitions.
3535
It relys on the [ADX schemas](https://adif.org/314/ADIF_314.htm#ADX_Schemas) from adif.org.
36+
For the ADX import there is no validation by default to be able to read fast.
3637

3738
Installation
3839
------------
@@ -80,4 +81,7 @@ Copyright
8081
---------
8182
PyADIF-File © 2024 by Andreas Schawo is licensed under [CC BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)
8283

83-
PyADIF-File uses xmlschema Copyright (c), 2016-2022, SISSA (Scuola Internazionale Superiore di Studi Avanzati).
84+
PyADIF-File uses
85+
* xmlschema Copyright (c), 2016-2022, SISSA (Scuola Internazionale Superiore di Studi Avanzati)
86+
* xmltodict Copyright (c), 2012 Martin Blech and individual contributors
87+

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ authors = [
77
description = "Convert ADIF ADI/ADX content to dictionary and vice versa"
88
readme = "README.md"
99
requires-python = ">=3.9"
10-
dependencies = ["xmlschema"]
10+
dependencies = ["xmlschema", "xmltodict"]
1111
classifiers = [
1212
"Development Status :: 4 - Beta",
1313
"Intended Audience :: Other Audience",

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
xmlschema
1+
xmlschema~=2.5.0
2+
xmltodict~=0.13.0

src/adif_file/adx.py

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33

44
import datetime
55
import os.path
6+
import xml
67
from xml.etree.ElementTree import ElementTree, ParseError
78

89
import xmlschema
10+
import xmltodict
911

1012
from adif_file.__version__ import __version__ as __version_str__
1113

@@ -32,40 +34,51 @@ class XmlSyntaxError(SyntaxError):
3234
pass
3335

3436

35-
def load(file_name: str) -> dict:
37+
def loads(adx_data: str, validate: bool = False) -> dict:
38+
"""Load ADX content to dictionary
39+
The ADX is not validated to conform to the standard
40+
41+
:param adx_data: the ADX content
42+
:param validate: validate the ADX against the genereic XSD (very slow)
43+
:return: the ADX as a dict
44+
"""
45+
46+
if validate:
47+
try:
48+
ADX_IMPORT_SCHEMA.validate(adx_data)
49+
except ParseError as exc:
50+
raise XmlSyntaxError(str(exc)) from None
51+
except xmlschema.validators.exceptions.XMLSchemaChildrenValidationError as exc:
52+
raise UndefinedElementException(f'in {exc.elem.tag}') from None
53+
except xmlschema.validators.exceptions.XMLSchemaValidationError as exc:
54+
raise MalformedValueException(f'Field "{exc.elem.tag}": {exc.reason}') from None
55+
56+
try:
57+
data_dict = xmltodict.parse(adx_data, cdata_key='$')
58+
data_dict = data_dict['ADX']
59+
if ('RECORDS' in data_dict and data_dict['RECORDS'] and
60+
'RECORD' in data_dict['RECORDS'] and data_dict['RECORDS']['RECORD']):
61+
data_dict['RECORDS'] = data_dict['RECORDS']['RECORD']
62+
else:
63+
data_dict['RECORDS'] = []
64+
return data_dict
65+
except xml.parsers.expat.ExpatError as exc:
66+
raise XmlSyntaxError(str(exc)) from None
67+
68+
69+
def load(file_name: str, validate: bool = False) -> dict:
3670
"""Load ADX file to dictionary
3771
The XML is validated against the generic XSD
3872
3973
:param file_name: the file name where the ADX data is stored
74+
:param validate: validate the ADX against the genereic XSD (very slow)
4075
:return: the ADX as a dict
4176
"""
4277

43-
try:
44-
data_dict = ADX_IMPORT_SCHEMA.to_dict(file_name, decimal_type=str)
45-
46-
# Flatten records
47-
records = []
48-
for rec in data_dict['RECORDS']['RECORD']:
49-
for elem in rec:
50-
if type(rec[elem][0]) is str: # Only for str to save APP data
51-
rec[elem] = rec[elem][0]
52-
records.append(rec)
53-
data_dict['RECORDS'] = records
54-
55-
# Flatten header
56-
header = {}
57-
for elem in data_dict['HEADER']:
58-
if type(data_dict['HEADER'][elem][0]) is str: # Only for str to save USERDEF
59-
header[elem] = data_dict['HEADER'][elem][0]
60-
data_dict['HEADER'] = header
61-
except ParseError as exc:
62-
raise XmlSyntaxError(str(exc)) from None
63-
except xmlschema.validators.exceptions.XMLSchemaChildrenValidationError as exc:
64-
raise UndefinedElementException(f'in {exc.elem.tag}') from None
65-
except xmlschema.validators.exceptions.XMLSchemaValidationError as exc:
66-
raise MalformedValueException(f'Field "{exc.elem.tag}": {exc.reason}') from None
78+
with open(file_name, encoding='utf-8') as xf:
79+
adx_data = xf.read()
6780

68-
return data_dict
81+
return loads(adx_data, validate)
6982

7083

7184
def dump(file_name: str, data_dict: dict):
@@ -106,5 +119,5 @@ def dump(file_name: str, data_dict: dict):
106119
raise MalformedValueException(f'Field "{exc.elem.tag}": {exc.reason}') from None
107120

108121

109-
__all__ = ['load', 'dump', 'MissingRecordsException', 'UndefinedElementException',
122+
__all__ = ['load', 'loads', 'dump', 'MissingRecordsException', 'UndefinedElementException',
110123
'MalformedValueException', 'XmlSyntaxError']

test/test_dumpadx.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,14 @@ def test_20_dump(self):
4848
{'CALL': 'YY1YYY',
4949
'QSO_DATE': '20231204',
5050
'TIME_ON': '1200',
51-
'QTH_INTL': 'Töst'
51+
'QTH_INTL': 'Töst',
52+
'APP':
53+
{
54+
'@PROGRAMID': 'TESTAPP',
55+
'@FIELDNAME': 'TESTFIELD',
56+
'@TYPE': 'I',
57+
'$': 'Test',
58+
},
5259
}]
5360
}
5461

@@ -72,6 +79,7 @@ def test_20_dump(self):
7279
<QSO_DATE>20231204</QSO_DATE>
7380
<TIME_ON>1200</TIME_ON>
7481
<QTH_INTL>Töst</QTH_INTL>
82+
<APP PROGRAMID="TESTAPP" FIELDNAME="TESTFIELD" TYPE="I">Test</APP>
7583
</RECORD>
7684
</RECORDS>
7785
</ADX>

test/test_loadadx.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,18 @@ def test_10_goodfile(self):
2323
{'CALL': 'YY1YYY',
2424
'QSO_DATE': '20231204',
2525
'TIME_ON': '1200',
26-
'QTH_INTL': 'Töst'
26+
'QTH_INTL': 'Töst',
27+
'APP':
28+
{
29+
'@PROGRAMID': 'TESTAPP',
30+
'@FIELDNAME': 'TESTFIELD',
31+
'@TYPE': 'I',
32+
'$': 'Test',
33+
},
2734
}]
2835
}
2936

37+
self.maxDiff = None
3038
self.assertDictEqual(adx_exp_dict, adif_file.adx.load(get_file_path('testdata/goodfile.adx')))
3139

3240
def test_20_badfile(self):
@@ -35,9 +43,9 @@ def test_20_badfile(self):
3543

3644
def test_20_badxml(self):
3745
self.assertRaises(adif_file.adx.MalformedValueException, adif_file.adx.load,
38-
get_file_path('testdata/badfile1.adx'))
46+
get_file_path('testdata/badfile1.adx'), True)
3947
self.assertRaises(adif_file.adx.UndefinedElementException, adif_file.adx.load,
40-
get_file_path('testdata/badfile2.adx'))
48+
get_file_path('testdata/badfile2.adx'), True)
4149

4250

4351
if __name__ == '__main__':

test/testdata/goodfile.adx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
<QSO_DATE>20231204</QSO_DATE>
1919
<TIME_ON>1200</TIME_ON>
2020
<QTH_INTL>Töst</QTH_INTL>
21+
<APP PROGRAMID="TESTAPP" FIELDNAME="TESTFIELD" TYPE="I">Test</APP>
2122
</RECORD>
2223
</RECORDS>
2324
</ADX>

0 commit comments

Comments
 (0)