Skip to content

Commit ef67d4d

Browse files
committed
New condition for compressing data
New keyword compress_data in COMPRESS in the input file for specifying the data type that should be compressed. In the old version the estimated noise level from the compression was never used.
1 parent 557da10 commit ef67d4d

3 files changed

Lines changed: 17 additions & 25 deletions

File tree

src/pipt/loop/assimilation.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -475,13 +475,10 @@ def post_process_forecast(self):
475475
vintage = 0
476476

477477
# Store according to sparse_info
478-
if vintage < len(self.ensemble.sparse_info['mask']) and \
479-
pred_data[key].shape[0] == int(np.sum(self.ensemble.sparse_info['mask'][vintage])):
480-
478+
if key == self.ensemble.sparse_info['compress_data'] and pred_data[key] is not None:
481479
# If first entry in pred_data_tmp
482480
if pred_data_tmp[i] is None:
483481
pred_data_tmp[i] = {key: pred_data[key]}
484-
485482
else:
486483
pred_data_tmp[i][key] = pred_data[key]
487484

@@ -516,19 +513,18 @@ def post_process_forecast(self):
516513
self.ensemble.data_rec = []
517514
for i in range(len(pred_data_tmp)): # INDEX
518515
if pred_data_tmp[i] is not None:
519-
for k in pred_data_tmp[i]: # DATATYPE
520-
if vintage < len(self.ensemble.sparse_info['mask']) and \
521-
len(pred_data_tmp[i][k]) == int(np.sum(self.ensemble.sparse_info['mask'][vintage])):
516+
for key in pred_data_tmp[i]: # DATATYPE
517+
if key == self.ensemble.sparse_info['compress_data']:
522518
if self.ensemble.keys_da['daalg'][1] == 'gies':
523-
self.ensemble.pred_data[i][k] = np.zeros(
524-
(len(self.ensemble.obs_data[i][k]), self.ensemble.ne+1))
519+
self.ensemble.pred_data[i][key] = np.zeros(
520+
(len(self.ensemble.obs_data[i][key]), self.ensemble.ne+1))
525521
else:
526-
self.ensemble.pred_data[i][k] = np.zeros(
527-
(len(self.ensemble.obs_data[i][k]), self.ensemble.ne))
528-
for m in range(pred_data_tmp[i][k].shape[1]):
529-
data_array = self.ensemble.compress_manager(pred_data_tmp[i][k][:, m], vintage,
522+
self.ensemble.pred_data[i][key] = np.zeros(
523+
(len(self.ensemble.obs_data[i][key]), self.ensemble.ne))
524+
for m in range(pred_data_tmp[i][key].shape[1]):
525+
data_array = self.ensemble.compress_manager(pred_data_tmp[i][key][:, m], vintage,
530526
self.ensemble.sparse_info['use_ensemble'])
531-
self.ensemble.pred_data[i][k][:, m] = data_array
527+
self.ensemble.pred_data[i][key][:, m] = data_array
532528
vintage = vintage + 1
533529
if self.ensemble.sparse_info['use_ensemble']:
534530
self.ensemble.compress_manager()

src/pipt/loop/ensemble.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -278,10 +278,8 @@ def _org_obs_data(self):
278278
load_data = np.load(truedata[i][0]) # Load the .npz file
279279
data_array = load_data[load_data.files[0]]
280280

281-
# Perform compression if required (we only and always compress signals with same size as number of active cells)
282-
if self.sparse_info is not None and \
283-
vintage < len(self.sparse_info['mask']) and \
284-
len(data_array) == int(np.sum(self.sparse_info['mask'][vintage])):
281+
# Perform compression for the data type specified in self.sparse_info['compress_data'] if required
282+
if self.sparse_info is not None and datatype == self.sparse_info['compress_data']:
285283
data_array = self.compress_manager(data_array, vintage, False)
286284
vintage = vintage + 1
287285

@@ -306,16 +304,14 @@ def _org_obs_data(self):
306304
self.obs_data[i][self.keys_da['datatype'][0]] = np.array(
307305
truedata[i][:]) # no need to make this into a list
308306
else:
309-
for j in range(len(self.keys_da['datatype'])): # DATATYPE
307+
for j, datatype in enumerate(self.keys_da['datatype']):
310308
# Load a Numpy npz file
311309
if isinstance(truedata[i][j], str) and truedata[i][j].endswith('.npz'):
312310
load_data = np.load(truedata[i][j]) # Load the .npz file
313311
data_array = load_data[load_data.files[0]]
314312

315-
# Perform compression if required (we only and always compress signals with same size as number of active cells)
316-
if self.sparse_info is not None and \
317-
vintage < len(self.sparse_info['mask']) and \
318-
len(data_array) == int(np.sum(self.sparse_info['mask'][vintage])):
313+
# Perform compression for the data type specified in self.sparse_info['compress_data'] if required
314+
if self.sparse_info is not None and datatype == self.sparse_info['compress_data']:
319315
data_array = self.compress_manager(data_array, vintage, False)
320316
vintage = vintage + 1
321317

@@ -521,8 +517,7 @@ def _org_data_var(self):
521517

522518
# Handle case when noise is estimated using wavelets
523519
if self.sparse_info is not None and self.datavar[i][datatype[j]] is not None and \
524-
vintage < len(self.sparse_info['mask']) and \
525-
len(self.datavar[i][datatype[j]]) == int(np.sum(self.sparse_info['mask'][vintage])):
520+
datatype[j]==self.sparse_info['compress_data']:
526521
# compute var from sparse_data
527522
est_noise = np.power(self.sparse_data[vintage].est_noise, 2)
528523
self.datavar[i][datatype[j]] = est_noise # override the given value

src/pipt/misc_tools/extract_tools.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ def organize_sparse_representation(info: Union[dict,list]) -> dict:
444444
sparse['mask'].append(mask.flatten())
445445

446446
# Read rest of keywords
447+
sparse['compress_data'] = info['compress_data']
447448
sparse['level'] = info['level']
448449
sparse['wname'] = info['wname']
449450
sparse['threshold_rule'] = info['threshold_rule']

0 commit comments

Comments
 (0)