Skip to content

Commit 0b6af28

Browse files
committed
ensure missing values in aggregated variables are filled instead of set to NaN
+ update tests for aggregated_timeseries to check for these issues
1 parent ef75f70 commit 0b6af28

3 files changed

Lines changed: 33 additions & 14 deletions

File tree

aodntools/timeseries_products/aggregated_timeseries.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def get_variable_values(nc, variable):
3939
Get values of the variable and its QC flags.
4040
If variable is not present, nan returned, its QC flags set to 9
4141
If variable present but not its QC flags, QC set to 0
42-
:param nc: dataset
42+
:param nc: xarray dataset
4343
:param variable: name of the variable to get
4444
:return: variable values and variable qc flags
4545
"""
@@ -48,6 +48,8 @@ def get_variable_values(nc, variable):
4848

4949
if variable in file_variables:
5050
variable_values = nc[variable].values
51+
if any(np.isnan(variable_values)):
52+
variable_values = np.ma.masked_array(variable_values, mask=np.isnan(variable_values))
5153
if variable+'_quality_control' in file_variables:
5254
variableQC_values = nc[variable+'_quality_control'].values
5355
else:
@@ -317,12 +319,12 @@ def main_aggregator(files_to_agg, var_to_agg, site_code, input_dir='', output_di
317319
'site_code': site_code,
318320
'time_coverage_start': time_start,
319321
'time_coverage_end': time_end,
320-
'geospatial_vertical_min': np.min(ds['DEPTH']),
321-
'geospatial_vertical_max': np.max(ds['DEPTH']),
322-
'geospatial_lat_min': np.min(ds['LATITUDE']),
323-
'geospatial_lat_max': np.max(ds['LATITUDE']),
324-
'geospatial_lon_min': np.min(ds['LONGITUDE']),
325-
'geospatial_lon_max': np.max(ds['LONGITUDE']),
322+
'geospatial_vertical_min': np.min(ds['DEPTH'][:]),
323+
'geospatial_vertical_max': np.max(ds['DEPTH'][:]),
324+
'geospatial_lat_min': np.min(ds['LATITUDE'][:]),
325+
'geospatial_lat_max': np.max(ds['LATITUDE'][:]),
326+
'geospatial_lon_min': np.min(ds['LONGITUDE'][:]),
327+
'geospatial_lon_max': np.max(ds['LONGITUDE'][:]),
326328
'date_created': datetime.utcnow().strftime(timeformat),
327329
'history': datetime.utcnow().strftime(timeformat) + ': Aggregated file created.',
328330
'keywords': ', '.join([var_to_agg, 'AGGREGATED']),

test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20200622.nc renamed to test_aodntools/timeseries_products/IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20220607.nc

199 KB
Binary file not shown.

test_aodntools/timeseries_products/test_aggregated_timeseries.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import unittest
55

6+
import numpy as np
67
from netCDF4 import Dataset, chartostring
78

89
from aodntools import __version__
@@ -19,7 +20,7 @@
1920
BAD_FILE
2021
]
2122
EXPECTED_OUTPUT_FILE = os.path.join(
22-
TEST_ROOT, 'IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20200622.nc'
23+
TEST_ROOT, 'IMOS_ANMN-NRS_TZ_20181213_NRSROT_FV01_TEMP-aggregated-timeseries_END-20190523_C-20220607.nc'
2324
)
2425

2526

@@ -69,13 +70,29 @@ def test_main_aggregator(self):
6970
self.assertIn(__version__, dataset.lineage)
7071
self.assertIn(BAD_FILE, dataset.rejected_files)
7172

72-
# check aggregated variable values
73+
compare_attrs = ('Conventions', 'feature_type', 'author', 'author_email', 'file_version',
74+
'geospatial_lat_max', 'geospatial_lat_min', 'geospatial_lon_max', 'geospatial_lon_min',
75+
'geospatial_vertical_max', 'geospatial_vertical_min', 'naming_authority', 'project',
76+
'time_coverage_start', 'time_coverage_end'
77+
)
7378
expected = Dataset(EXPECTED_OUTPUT_FILE)
74-
compare_vars = ('TIME', 'TEMP', 'TEMP_quality_control', 'NOMINAL_DEPTH', 'instrument_index')
75-
non_match_vars = [var for var in compare_vars
76-
if not all(dataset[var][:] == expected[var][:])
77-
]
78-
self.assertEqual(non_match_vars, [])
79+
for attr in compare_attrs:
80+
self.assertEqual(dataset.getncattr(attr), expected.getncattr(attr))
81+
82+
# check that there are no NaN values in any variable (they should be fill values instead)
83+
nan_vars = [name
84+
for name, var in dataset.variables.items()
85+
if var.dtype in (np.dtype('float32'), np.dtype('float64')) and any(np.isnan(var[:]))
86+
]
87+
self.assertEqual([], nan_vars)
88+
89+
# check aggregated variable values
90+
non_match_vars = []
91+
for var in set(expected.variables.keys()) - string_vars:
92+
# compare the raw data arrays (not the masked_array)
93+
if not all(dataset[var][:].data == expected[var][:].data):
94+
non_match_vars.append(var)
95+
self.assertEqual([], non_match_vars)
7996

8097
def test_source_file_attributes(self):
8198
output_file, bad_files = main_aggregator(INPUT_FILES, 'PSAL', 'NRSROT', input_dir=TEST_ROOT,

0 commit comments

Comments
 (0)