Skip to content

Commit 899934f

Browse files
committed
Merge branch 'ingest-transform-config' into develop
2 parents 27227d6 + 6b521f5 commit 899934f

4 files changed

Lines changed: 156 additions & 14 deletions

File tree

CHANGES.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Changelog
1010
New features
1111
------------
1212

13-
+ `#160`_, `#161`_: Add class attributes to
13+
+ `#160`_, `#161`_, `#163`_: Add class attributes to
1414
:class:`icat.ingest.IngestReader` to make some prescribed values in
1515
the transformation to ICAT data file format configurable.
1616

@@ -22,6 +22,7 @@ Bug fixes and minor changes
2222
.. _#160: https://github.com/icatproject/python-icat/issues/160
2323
.. _#161: https://github.com/icatproject/python-icat/pull/161
2424
.. _#162: https://github.com/icatproject/python-icat/pull/162
25+
.. _#163: https://github.com/icatproject/python-icat/pull/163
2526

2627

2728
.. _changes-1_4_0:

src/icat/ingest.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ class IngestReader(XMLDumpFileReader):
9797
Dataset_complete = "false"
9898
"""Value to prescribe in the `complete` attribute of datasets.
9999
100+
.. note::
101+
The value for this class attribute is subject to change in
102+
version 2.0. You might want to override it in order to pin it
103+
to a value that is suitable for you.
104+
100105
.. versionadded:: 1.5.0
101106
"""
102107
DatasetType_name = "raw"
@@ -198,6 +203,20 @@ def get_environment(self, client):
198203
Subclasses may override this method to control the attributes
199204
set in the environment.
200205
206+
.. note::
207+
If you override this method, it is advisable to call the
208+
inherited method from the parent class and augment the
209+
result. This avoids inadvertently dropping environment
210+
settings added in future versions. E.g. do something
211+
like the following in your subclass:
212+
213+
.. code-block:: python
214+
215+
def get_environment(self, client):
216+
env = super().get_environment(client)
217+
env['mykey'] = 'value'
218+
return env
219+
201220
:param client: the client object being used by this
202221
IngestReader.
203222
:type client: :class:`icat.client.Client`

tests/data/ingest-env.xslt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
<apiversion>
1919
<xsl:copy-of select="string(/icatingest/_environment/@icat_version)"/>
2020
</apiversion>
21-
<generator>ingest-env.xslt</generator>
21+
<generator>
22+
<xsl:copy-of select="string(/icatingest/_environment/@generator)"/>
23+
</generator>
2224
</head>
2325
</xsl:template>
2426

tests/test_06_ingest.py

Lines changed: 132 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,17 @@ def schemadir(monkeypatch):
6969
monkeypatch.setattr(IngestReader, "SchemaDir", testdatadir)
7070

7171

72-
class CapturingIngestReader(IngestReader):
73-
"""Modified version of Ingest reader that captures ingest_data in
74-
add_environment().
72+
class EnvironmentIngestReader(IngestReader):
73+
"""Modified version of IngestReader
74+
- Allow custom environment settings to be included.
75+
- Capture the ingest data after injection of the environment in an
76+
attribute.
7577
"""
78+
_add_env = dict()
79+
def get_environment(self, client):
80+
env = super().get_environment(client)
81+
env.update(self._add_env)
82+
return env
7683
def add_environment(self, client, ingest_data):
7784
super().add_environment(client, ingest_data)
7885
self._ingest_data = ingest_data
@@ -104,6 +111,10 @@ class MyIngestReader(IngestReader):
104111
metadata = gettestdata("metadata-4.4-inl.xml"),
105112
checks = {
106113
"testingest_inl_1": [
114+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
115+
False),
116+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
117+
"raw"),
107118
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
108119
"Dy01Cp02 at 2.7 K"),
109120
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -120,6 +131,10 @@ class MyIngestReader(IngestReader):
120131
2.74103),
121132
],
122133
"testingest_inl_2": [
134+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
135+
False),
136+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
137+
"raw"),
123138
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
124139
"Dy01Cp02 at 5.1 K"),
125140
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -143,6 +158,10 @@ class MyIngestReader(IngestReader):
143158
metadata = gettestdata("metadata-5.0-inl.xml"),
144159
checks = {
145160
"testingest_inl5_1": [
161+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
162+
False),
163+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
164+
"raw"),
146165
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
147166
"Dy01Cp02 at 2.7 K"),
148167
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -167,6 +186,10 @@ class MyIngestReader(IngestReader):
167186
2.74103),
168187
],
169188
"testingest_inl5_2": [
189+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
190+
False),
191+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
192+
"raw"),
170193
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
171194
"Dy01Cp02 at 5.1 K"),
172195
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -201,6 +224,10 @@ class MyIngestReader(IngestReader):
201224
metadata = gettestdata("metadata-4.4-sep.xml"),
202225
checks = {
203226
"testingest_sep_1": [
227+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
228+
False),
229+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
230+
"raw"),
204231
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
205232
"Dy01Cp02 at 2.7 K"),
206233
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -217,6 +244,10 @@ class MyIngestReader(IngestReader):
217244
2.74103),
218245
],
219246
"testingest_sep_2": [
247+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
248+
False),
249+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
250+
"raw"),
220251
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
221252
"Dy01Cp02 at 5.1 K"),
222253
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -240,6 +271,10 @@ class MyIngestReader(IngestReader):
240271
metadata = gettestdata("metadata-5.0-sep.xml"),
241272
checks = {
242273
"testingest_sep5_1": [
274+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
275+
False),
276+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
277+
"raw"),
243278
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
244279
"Dy01Cp02 at 2.7 K"),
245280
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -264,6 +299,10 @@ class MyIngestReader(IngestReader):
264299
2.74103),
265300
],
266301
"testingest_sep5_2": [
302+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
303+
False),
304+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
305+
"raw"),
267306
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
268307
"Dy01Cp02 at 5.1 K"),
269308
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -299,6 +338,10 @@ class MyIngestReader(IngestReader):
299338
metadata = gettestdata("metadata-sample.xml"),
300339
checks = {
301340
"testingest_sample_1": [
341+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
342+
False),
343+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
344+
"raw"),
302345
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
303346
"ab3465 at 2.7 K"),
304347
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -313,6 +356,10 @@ class MyIngestReader(IngestReader):
313356
"ab3465"),
314357
],
315358
"testingest_sample_2": [
359+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
360+
False),
361+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
362+
"raw"),
316363
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
317364
"ab3465 at 5.1 K"),
318365
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -327,6 +374,10 @@ class MyIngestReader(IngestReader):
327374
"ab3465"),
328375
],
329376
"testingest_sample_3": [
377+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
378+
False),
379+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
380+
"raw"),
330381
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
331382
"ab3466 at 2.7 K"),
332383
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -341,6 +392,10 @@ class MyIngestReader(IngestReader):
341392
"ab3466"),
342393
],
343394
"testingest_sample_4": [
395+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
396+
False),
397+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
398+
"raw"),
344399
("SELECT ds.description FROM Dataset ds WHERE ds.id = %d",
345400
"reference"),
346401
("SELECT ds.startDate FROM Dataset ds WHERE ds.id = %d",
@@ -366,7 +421,7 @@ def test_ingest_schema(client, investigation, schemadir, case):
366421
datasets = []
367422
for name in case.data:
368423
datasets.append(client.new("Dataset", name=name))
369-
reader = CapturingIngestReader(client, case.metadata, investigation)
424+
reader = EnvironmentIngestReader(client, case.metadata, investigation)
370425
print_xml(reader._ingest_data)
371426
print_xml(reader.infile)
372427
with get_icatdata_schema().open("rb") as f:
@@ -644,6 +699,63 @@ def test_ingest_error_searcherr(client, investigation, schemadir, case):
644699
logger.info("Raised %s: %s", exc.type.__name__, exc.value)
645700

646701

702+
classattr_metadata = NamedBytesIO("""<?xml version='1.0' encoding='UTF-8'?>
703+
<icatingest version="1.1">
704+
<head>
705+
<date>2024-10-11T10:51:26+02:00</date>
706+
<generator>metadata-writer 0.27a</generator>
707+
</head>
708+
<data>
709+
<dataset id="Dataset_1">
710+
<name>testingest_classattr_1</name>
711+
<description>Auxiliary data</description>
712+
<startDate>2022-02-03T15:40:12+01:00</startDate>
713+
<endDate>2022-02-03T17:04:22+01:00</endDate>
714+
</dataset>
715+
</data>
716+
</icatingest>
717+
""".encode("utf8"), "classattr_metadata")
718+
classattr_cases = [
719+
Case(
720+
data = ["testingest_classattr_1"],
721+
metadata = classattr_metadata,
722+
checks = {
723+
"testingest_classattr_1": [
724+
("SELECT ds.complete FROM Dataset ds WHERE ds.id = %d",
725+
True),
726+
("SELECT ds.type.name FROM Dataset ds WHERE ds.id = %d",
727+
"other"),
728+
],
729+
},
730+
marks = (),
731+
),
732+
]
733+
@pytest.mark.parametrize("case", [
734+
pytest.param(c, id=c.metadata.name, marks=c.marks) for c in classattr_cases
735+
])
736+
def test_ingest_classattr(monkeypatch, client, investigation, schemadir, case):
737+
"""Test overriding prescribed values set in IngestReader class attributes.
738+
"""
739+
monkeypatch.setattr(IngestReader, "Dataset_complete", "true")
740+
monkeypatch.setattr(IngestReader, "DatasetType_name", "other")
741+
datasets = []
742+
for name in case.data:
743+
datasets.append(client.new("Dataset", name=name))
744+
reader = IngestReader(client, case.metadata, investigation)
745+
reader.ingest(datasets, dry_run=True, update_ds=True)
746+
for ds in datasets:
747+
ds.create()
748+
reader.ingest(datasets)
749+
for name in case.checks.keys():
750+
query = Query(client, "Dataset", conditions={
751+
"name": "= '%s'" % name,
752+
"investigation.id": "= %d" % investigation.id,
753+
})
754+
ds = client.assertedSearch(query)[0]
755+
for query, res in case.checks[name]:
756+
assert client.assertedSearch(query % ds.id)[0] == res
757+
758+
647759
customcases = [
648760
Case(
649761
data = ["testingest_custom_icatingest_1"],
@@ -734,22 +846,30 @@ def test_custom_ingest(client, investigation, samples, schemadir, case):
734846
def test_ingest_env(monkeypatch, client, investigation, schemadir, case):
735847
"""Test using the _environment element.
736848
737-
Applying a custom XSLT that extracts an attribute from the
738-
_environment element that is injected by IngestReader into the
739-
input data and puts that values into the head element of the
740-
transformed input. This is to test that adding the _environment
741-
element works and it is in principle possible to make use of the
742-
values in the XSLT.
849+
Add a custom attribute to the _environment that is injected by
850+
IngestReader into the input data. Apply a custom XSLT that
851+
extracts attributes from the _environment element and puts the
852+
values into the head element of the transformed input. This is to
853+
test that adding the _environment element works and it is in
854+
principle possible to make use of the values in the XSLT.
743855
"""
744-
monkeypatch.setattr(IngestReader,
856+
generator = "test_ingest_env (python-icat %s)" % icat.__version__
857+
monkeypatch.setattr(EnvironmentIngestReader,
858+
"_add_env", dict(generator=generator))
859+
monkeypatch.setattr(EnvironmentIngestReader,
745860
"XSLT_Map", dict(icatingest="ingest-env.xslt"))
746861
datasets = []
747862
for name in case.data:
748863
datasets.append(client.new("Dataset", name=name))
749-
reader = IngestReader(client, case.metadata, investigation)
864+
reader = EnvironmentIngestReader(client, case.metadata, investigation)
865+
print_xml(reader._ingest_data)
866+
print_xml(reader.infile)
750867
with get_icatdata_schema().open("rb") as f:
751868
schema = etree.XMLSchema(etree.parse(f))
752869
schema.assertValid(reader.infile)
753870
version_elem = reader.infile.xpath("/icatdata/head/apiversion")
754871
assert version_elem
755872
assert version_elem[0].text == str(client.apiversion)
873+
generator_elem = reader.infile.xpath("/icatdata/head/generator")
874+
assert generator_elem
875+
assert generator_elem[0].text == generator

0 commit comments

Comments
 (0)