Skip to content

Commit 537d711

Browse files
authored
Merge pull request #930 from davidhassell/persist-environment
Persist data after computation
2 parents bb71b6a + 021fc03 commit 537d711

5 files changed

Lines changed: 87 additions & 19 deletions

File tree

Changelog.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@ Version NEXTVERSION
33

44
**2026-??-??**
55

6+
* New keyword parameter to `cf.Data.compute`: ``persist``
7+
(https://github.com/NCAS-CMS/cf-python/issues/929)
8+
* New function to control the persistence of computed data:
9+
`cf.persist_data` (https://github.com/NCAS-CMS/cf-python/issues/929)
610
* New default backend for netCDF-4 in `cf.write`: ``h5netcdf-h5py``,
711
that allows control of the internal file metadata via the new
812
``h5py_options`` parameter

cf/functions.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def configuration(
157157
chunksize=None,
158158
log_level=None,
159159
display_data=None,
160+
persist_data=None,
160161
regrid_logging=None,
161162
relaxed_identities=None,
162163
bounds_combination_mode=None,
@@ -179,6 +180,7 @@ def configuration(
179180
* `chunksize`
180181
* `log_level`
181182
* `display_data`
183+
* `persist_data`
182184
* `regrid_logging`
183185
* `relaxed_identities`
184186
* `bounds_combination_mode`
@@ -203,9 +205,10 @@ def configuration(
203205
204206
.. seealso:: `atol`, `rtol`, `tempdir`, `chunksize`,
205207
`total_memory`, `log_level`, `display_data`,
206-
`regrid_logging`, `relaxed_identities`,
207-
`bounds_combination_mode`, `active_storage`,
208-
`active_storage_url`, `active_storage_max_requests`
208+
`persist_data`, `regrid_logging`,
209+
`relaxed_identities`, `bounds_combination_mode`,
210+
`active_storage`, `active_storage_url`,
211+
`active_storage_max_requests`
209212
210213
:Parameters:
211214
@@ -247,12 +250,18 @@ def configuration(
247250
* ``'DETAIL'`` (``3``);
248251
* ``'DEBUG'`` (``-1``).
249252
250-
display_data `bool` or `Constant`, optional
253+
display_data: `bool` or `Constant`, optional
251254
The new display data option. The default is to not change
252255
the current behaviour.
253256
254257
.. versionadded:: 3.19.0
255258
259+
persist_data: `bool` or `Constant`, optional
260+
The new persist data option. The default is to not change
261+
the current behaviour.
262+
263+
.. versionadded:: NEXTVERSION
264+
256265
regrid_logging: `bool` or `Constant`, optional
257266
The new value (either True to enable logging or False to
258267
disable it). The default is to not change the current
@@ -312,6 +321,7 @@ def configuration(
312321
'bounds_combination_mode': 'AND',
313322
'chunksize': 82873466.88000001,
314323
'display_data': True,
324+
'persist_data': False,
315325
'active_storage': False,
316326
'active_storage_url': None,
317327
'active_storage_max_requests': 100}
@@ -330,6 +340,7 @@ def configuration(
330340
'bounds_combination_mode': 'AND',
331341
'chunksize': 75000000.0,
332342
'display_data': True,
343+
'persist_data': False,
333344
'active_storage': False,
334345
'active_storage_url': None,
335346
'active_storage_max_requests': 100}
@@ -358,6 +369,7 @@ def configuration(
358369
'bounds_combination_mode': 'AND',
359370
'chunksize': 75000000.0,
360371
'display_data': True,
372+
'persist_data': False,
361373
'active_storage': False,
362374
'active_storage_url': None}
363375
>>> with cf.configuration(atol=9, rtol=10):
@@ -372,6 +384,7 @@ def configuration(
372384
'bounds_combination_mode': 'AND',
373385
'chunksize': 75000000.0,
374386
'display_data': True,
387+
'persist_data': False,
375388
'active_storage': False,
376389
'active_storage_url': None,
377390
'active_storage_max_requests': 100}
@@ -385,6 +398,7 @@ def configuration(
385398
'bounds_combination_mode': 'AND',
386399
'chunksize': 75000000.0,
387400
'display_data': True,
401+
'persist_data': False,
388402
'active_storage': False,
389403
'active_storage_url': None,
390404
'active_storage_max_requests': 100}
@@ -416,6 +430,7 @@ def configuration(
416430
new_chunksize=chunksize,
417431
new_log_level=log_level,
418432
new_display_data=display_data,
433+
new_persist_data=persist_data,
419434
new_regrid_logging=regrid_logging,
420435
new_relaxed_identities=relaxed_identities,
421436
bounds_combination_mode=bounds_combination_mode,
@@ -460,6 +475,7 @@ def _configuration(_Configuration, **kwargs):
460475
"new_chunksize": chunksize,
461476
"new_log_level": log_level,
462477
"new_display_data": display_data,
478+
"new_persist_data": persist_data,
463479
"new_regrid_logging": regrid_logging,
464480
"new_relaxed_identities": relaxed_identities,
465481
"bounds_combination_mode": bounds_combination_mode,
@@ -590,6 +606,10 @@ class display_data(ConstantAccess, cfdm.display_data):
590606
pass
591607

592608

609+
class persist_data(ConstantAccess, cfdm.persist_data):
610+
pass
611+
612+
593613
class regrid_logging(ConstantAccess):
594614
"""Whether or not to enable `esmpy` regridding logging.
595615

cf/test/test_Data.py

Lines changed: 55 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
faulthandler.enable() # to debug seg faults and timeouts
1919

20+
import cfdm
21+
2022
import cf
2123

2224
n_tmpfiles = 2
@@ -3050,8 +3052,6 @@ def test_Data_where(self):
30503052

30513053
def test_Data__init__compression(self):
30523054
"""Test Data initialised from compressed data sources."""
3053-
import cfdm
3054-
30553055
# Ragged
30563056
for f in cfdm.read("DSG_timeSeries_contiguous.nc"):
30573057
f = f.data
@@ -3200,6 +3200,59 @@ def test_Data_compute(self):
32003200
d.compute()
32013201
self.assertEqual(d.get_cached_elements(), {0: 1, 1: 2, -1: 2})
32023202

3203+
# Persist
3204+
f = cf.read(self.filename, dask_chunks=3)[0]
3205+
d0 = f.data
3206+
npartitions = d0.npartitions
3207+
self.assertGreater(npartitions, 1)
3208+
3209+
with cf.persist_data(False):
3210+
d = d0.copy()
3211+
a = d.compute()
3212+
self.assertEqual(len(d.get_filenames()), 1)
3213+
b = d.compute()
3214+
self.assertEqual(len(d.get_filenames()), 1)
3215+
self.assertTrue(np.allclose(a, b))
3216+
3217+
d = d0.copy()
3218+
a = d.compute()
3219+
self.assertEqual(len(d.get_filenames()), 1)
3220+
b = d.compute(persist=None)
3221+
self.assertEqual(len(d.get_filenames()), 1)
3222+
self.assertTrue(np.allclose(a, b))
3223+
3224+
d = d0.copy()
3225+
a = d.compute()
3226+
self.assertEqual(len(d.get_filenames()), 1)
3227+
b = d.compute(persist=False)
3228+
self.assertEqual(len(d.get_filenames()), 1)
3229+
self.assertTrue(np.allclose(a, b))
3230+
3231+
d = d0.copy()
3232+
a = d.compute()
3233+
self.assertEqual(len(d.get_filenames()), 1)
3234+
b = d.compute(persist=True)
3235+
self.assertEqual(len(d.get_filenames()), 0)
3236+
self.assertEqual(d.npartitions, npartitions)
3237+
self.assertTrue(np.allclose(a, b))
3238+
3239+
with cf.persist_data(True):
3240+
d = d0.copy()
3241+
d.compute()
3242+
self.assertEqual(len(d.get_filenames()), 0)
3243+
3244+
d = d0.copy()
3245+
d.compute(persist=None)
3246+
self.assertEqual(len(d.get_filenames()), 0)
3247+
3248+
d = d0.copy()
3249+
d.compute(persist=False)
3250+
self.assertEqual(len(d.get_filenames()), 1)
3251+
3252+
d = d0.copy()
3253+
d.compute(persist=True)
3254+
self.assertEqual(len(d.get_filenames()), 0)
3255+
32033256
def test_Data_persist(self):
32043257
"""Test Data.persist."""
32053258
d = cf.Data(9, "km")
@@ -4149,8 +4202,6 @@ def test_Data_masked_invalid(self):
41494202

41504203
def test_Data_uncompress(self):
41514204
"""Test the `uncompress` Data method."""
4152-
import cfdm
4153-
41544205
f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
41554206
a = f.data.array
41564207
d = cf.Data(cf.RaggedContiguousArray(source=f.data.source()))
@@ -4274,8 +4325,6 @@ def test_Data_soften_mask(self):
42744325

42754326
def test_Data_compressed_array(self):
42764327
"""Test the `compressed_array` Data property."""
4277-
import cfdm
4278-
42794328
f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
42804329
f = f.data
42814330
d = cf.Data(cf.RaggedContiguousArray(source=f.source()))
@@ -4305,8 +4354,6 @@ def test_Data_fits_in_memory(self):
43054354

43064355
def test_Data_get_compressed(self):
43074356
"""Test the Data methods which get compression properties."""
4308-
import cfdm
4309-
43104357
# Compressed
43114358
f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
43124359
f = f.data
@@ -4365,8 +4412,6 @@ def test_Data_get_data(self):
43654412

43664413
def test_Data_get_count(self):
43674414
"""Test the `get_count` Data method."""
4368-
import cfdm
4369-
43704415
f = cfdm.read("DSG_timeSeries_contiguous.nc")[0]
43714416
f = f.data
43724417
d = cf.Data(cf.RaggedContiguousArray(source=f.source()))
@@ -4378,8 +4423,6 @@ def test_Data_get_count(self):
43784423

43794424
def test_Data_get_index(self):
43804425
"""Test the `get_index` Data method."""
4381-
import cfdm
4382-
43834426
f = cfdm.read("DSG_timeSeries_indexed.nc")[0]
43844427
f = f.data
43854428
d = cf.Data(cf.RaggedIndexedArray(source=f.source()))
@@ -4391,8 +4434,6 @@ def test_Data_get_index(self):
43914434

43924435
def test_Data_get_list(self):
43934436
"""Test the `get_list` Data method."""
4394-
import cfdm
4395-
43964437
f = cfdm.read("gathered.nc")[0]
43974438
f = f.data
43984439
d = cf.Data(cf.GatheredArray(source=f.source()))

cf/test/test_functions.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_configuration(self):
5555
self.assertIsInstance(org, dict)
5656

5757
# Check all keys that should be there are, with correct value type:
58-
self.assertEqual(len(org), 12) # update expected len if add new key(s)
58+
self.assertEqual(len(org), 13) # update expected len if add new key(s)
5959

6060
# Types expected:
6161
self.assertIsInstance(org["atol"], float)
@@ -71,6 +71,7 @@ def test_configuration(self):
7171
# equiv. string
7272
self.assertIsInstance(org["log_level"], str)
7373
self.assertIsInstance(org["display_data"], bool)
74+
self.assertIsInstance(org["persist_data"], bool)
7475

7576
# Store some sensible values to reset items to for testing, ensuring:
7677
# 1) they are kept different to the defaults (i.e. org values); and

docs/source/function.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,8 @@ Resource management
135135

136136
cf.configuration
137137
cf.chunksize
138+
cf.display_data
139+
cf.persist_data
138140
cf.free_memory
139141
cf.regrid_logging
140142
cf.tempdir

0 commit comments

Comments
 (0)