Skip to content

Commit a0327f9

Browse files
author
Matt Bertrand
committed
Processor for Hadley Centre gridded daily temperature datasets 1950-2014
1 parent f5ce843 commit a0327f9

9 files changed

Lines changed: 496 additions & 29 deletions

File tree

dataqs/hadghcnd/__init__.py

Whitespace-only changes.

dataqs/hadghcnd/hadghcnd.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
###############################################################################
5+
# Copyright Kitware Inc. and Epidemico Inc.
6+
#
7+
# Licensed under the Apache License, Version 2.0 ( the "License" );
8+
# you may not use this file except in compliance with the License.
9+
# You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
###############################################################################
19+
20+
from __future__ import absolute_import
21+
import logging
22+
import os
23+
import re
24+
import shutil
25+
from datetime import date
26+
from time import sleep
27+
import gdal
28+
from dateutil.relativedelta import relativedelta
29+
from dataqs.processor_base import GeoDataMosaicProcessor, GS_DATA_DIR, \
30+
GS_TMP_DIR, RSYNC_WAIT_TIME
31+
from dataqs.helpers import gdal_translate, style_exists, create_band_vrt, untar
32+
33+
logger = logging.getLogger("dataqs.processors")
34+
script_dir = os.path.dirname(os.path.realpath(__file__))
35+
36+
37+
class HadGHCNDProcessor(GeoDataMosaicProcessor):
38+
"""
39+
Hadley Centre gridded daily temperature dataset based upon near-surface
40+
maximum (TX) and minimum (TN) temperature observations.
41+
It is designed primarily for the analysis of climate extremes and also for
42+
climate model evaluation. It spans the years 1950 to present and is
43+
available on a 2.5° latitude by 3.75° longitude grid. This dataset has
44+
been developed in collaboration with the United States National Centers for
45+
Environmental Information (NCEI), formerly the National Climatic Data Center
46+
(NCDC).
47+
More info at http://www.metoffice.gov.uk/hadobs/hadghcnd/index.html
48+
"""
49+
prefix = "HadGHCND"
50+
tmp_dir = os.path.join(GS_TMP_DIR, prefix)
51+
base_url = "http://www.metoffice.gov.uk/hadobs/hadghcnd/data/"
52+
53+
layers = {
54+
# 'HadGHCND_TXTN_anoms_1950-2014_15052015.nc.tgz': {
55+
# 'title': 'HadGHCND Temperature Anomalies - {measure}, {interval}',
56+
# 'name': '{prefix}_anomalies_{measure}_{interval}'
57+
# },
58+
'HadGHCND_TXTN_acts_1950-2014_15102015.nc.tgz': {
59+
'title': 'HadGHCND Actual Temperatures - {measure}, {interval}',
60+
'name': '{prefix}_temperatures_{measure}_{interval}'
61+
}
62+
63+
}
64+
abstract = (
65+
u"HadGHCND is a gridded daily temperature dataset based upon near-"
66+
u"surface maximum (TX) and minimum (TN) temperature observations. It is"
67+
u" designed primarily for the analysis of climate extremes and also for"
68+
u" climate model evaluation. It spans the years 1950 to present and is "
69+
u"available on a 2.5° latitude by 3.75° longitude grid. This dataset "
70+
u"has been developed in collaboration with the United States National "
71+
u"Centers for Environmental Information (NCEI), formerly the National "
72+
u"Climatic Data Center (NCDC).\n\nHadGHCND has been created using daily"
73+
u" station observations in NCEI's GHCN (Global Historical Climatology "
74+
u"Network)-Daily database. This consists of over 27,000 stations with "
75+
u"temperature observations, though many of these have quite short "
76+
u"records, or gaps in the record. Quality control has been carried out "
77+
u"to indicate potentially spurious values. We have filtered down these "
78+
u"stations to obtain those for which we can adequately calculate a 1961"
79+
u"-90 daily climatology. The dataset represents anomalies from the 1961"
80+
u"-1990 climatology.\n\nAn angular-distance weighting approach was used"
81+
u" to interpolate the station data onto the required grid. We chose to "
82+
u"grid the station anomalies to overcome some of the issues associated "
83+
u"with elevation dependence.\n\nThe data are available as gridded "
84+
u"anomalies, relative to the 1961-90 base period, and also as gridded "
85+
u"actual temperatures. The actual temperatures were created by gridding"
86+
u" the daily normals and adding these to the relevant daily anomaly.\n"
87+
u"\nSource: http://www.metoffice.gov.uk/hadobs/hadghcnd/index.html\n\n"
88+
u"Raw data file: {}")
89+
90+
def extract_band(self, ncfile, band, outname, projection=None):
91+
"""
92+
Extract specified band from NetCDF file and convert to GeoTIFF,
93+
using a VRT file to swap E-W axis
94+
:param ncfile: NetCDF input filename, formatted for use in GDAL
95+
:param band: Band number to process
96+
:param outname: Output GeoTIFF filename
97+
:param projection: Projection to use
98+
:return: Full pathname of output GeoTIFF
99+
"""
100+
temp_vrt = os.path.join(self.tmp_dir, outname + '.vrt')
101+
try:
102+
source_xml = """
103+
<root>
104+
<SimpleSource>
105+
<SourceFilename relativeToVRT="1">{fname}</SourceFilename>
106+
<SourceBand>{band}</SourceBand>
107+
<SrcRect xOff="48" yOff="0" xSize="96" ySize="73"/>
108+
<DstRect xOff="0" yOff="0" xSize="96" ySize="73"/>
109+
</SimpleSource>
110+
<SimpleSource>
111+
<SourceFilename relativeToVRT="1">{fname}</SourceFilename>
112+
<SourceBand>{band}</SourceBand>
113+
<SrcRect xOff="0" yOff="0" xSize="96" ySize="73"/>
114+
<DstRect xOff="48" yOff="0" xSize="96" ySize="73"/>
115+
</SimpleSource>
116+
</root>
117+
""".format(fname=ncfile, band=band)
118+
geotransform = ','.join(['-1.818750000000000000e+02',
119+
'3.7500000000000000e+00',
120+
'0.0000000000000000e+00',
121+
'9.1250000000000000e+01',
122+
'0.0000000000000000e+00',
123+
'-2.5000000000000000e+00'])
124+
create_band_vrt(ncfile, temp_vrt, [band], source_xml,
125+
projection=projection, geotransform=geotransform)
126+
gdal_translate(temp_vrt,
127+
os.path.join(self.tmp_dir, outname),
128+
projection='EPSG:4326',
129+
options=['TILED=YES', 'COMPRESS=LZW'])
130+
finally:
131+
if os.path.exists(temp_vrt):
132+
os.remove(temp_vrt)
133+
134+
return os.path.join(self.tmp_dir, outname)
135+
136+
def get_date(self, days):
137+
"""
138+
Calculate the date from the NetCDF band time value in days since 0/0/00
139+
:param days: Days since 00/00/00
140+
:return: Python date object
141+
"""
142+
start = date(1, 1, 1)
143+
return start + relativedelta(days=days-2) + relativedelta(years=-1)
144+
145+
def run(self):
146+
"""
147+
Retrieve and process the HadGHCND climate data.
148+
"""
149+
for key in self.layers.keys():
150+
src = os.path.join(self.base_url, key)
151+
cdf_files = untar(src, self.tmp_dir)
152+
for cdf in cdf_files:
153+
interval = re.findall('\d{4}-\d{4}',
154+
os.path.basename(cdf))[0]
155+
for measure in ('tmin', 'tmax'):
156+
ncds_gdal_name = 'NETCDF:{}:{}'.format(cdf, measure)
157+
ncds = gdal.Open(ncds_gdal_name)
158+
bands = ncds.RasterCount
159+
layer_name = self.layers[key]['name'].format(
160+
prefix=self.prefix, measure=measure, interval=interval
161+
)
162+
img_list = self.get_mosaic_filenames(layer_name)
163+
files = []
164+
for band in range(1, bands + 1):
165+
days = int(ncds.GetRasterBand(band)
166+
.GetMetadata()['NETCDF_DIM_time'])
167+
band_date = re.sub('[\-\.]+', '',
168+
self.get_date(days).isoformat())
169+
img_name = '{}_{}T000000000Z.tif'.format(layer_name,
170+
band_date)
171+
if img_name not in img_list:
172+
band_tif = self.extract_band(ncds_gdal_name,
173+
band,
174+
img_name,
175+
projection='WGS84')
176+
dst_file = self.data_dir.format(gsd=GS_DATA_DIR,
177+
ws=self.workspace,
178+
layer=layer_name,
179+
file=img_name)
180+
dst_dir = os.path.dirname(dst_file)
181+
if not os.path.exists(dst_dir):
182+
os.makedirs(dst_dir)
183+
shutil.move(band_tif, dst_file)
184+
files.append(dst_file)
185+
sleep(RSYNC_WAIT_TIME * 2)
186+
for file in files:
187+
self.post_geoserver(file, layer_name, sleeptime=0)
188+
style = '_'.join(layer_name.split('_')[0:2])
189+
if not style_exists(layer_name):
190+
with open(os.path.join(script_dir,
191+
'resources/{}.sld'.format(
192+
style))) as sld:
193+
self.set_default_style(layer_name,
194+
layer_name,
195+
sld.read())
196+
title = self.layers[key]['title'].format(
197+
measure=measure, interval=interval
198+
)
199+
self.update_geonode(layer_name,
200+
title=title,
201+
description=self.abstract.format(src),
202+
store=layer_name,
203+
bounds=('-180.0', '180.0',
204+
'-90.0', '90.0',
205+
'EPSG:4326'))
206+
self.truncate_gs_cache(layer_name)
207+
self.cleanup()
208+
209+
210+
if __name__ == '__main__':
211+
processor = HadGHCNDProcessor()
212+
processor.run()
Binary file not shown.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<?xml version="1.0" encoding="UTF-8"?><sld:StyledLayerDescriptor xmlns="http://www.opengis.net/sld" xmlns:sld="http://www.opengis.net/sld" xmlns:ogc="http://www.opengis.net/ogc" xmlns:gml="http://www.opengis.net/gml" version="1.0.0">
2+
<sld:NamedLayer>
3+
<sld:Name>gistemp1200_ersstv4</sld:Name>
4+
<sld:UserStyle>
5+
<sld:Name>gistemp1200_ersstv4</sld:Name>
6+
<sld:Title>gistemp1200_ersstv4</sld:Title>
7+
<sld:FeatureTypeStyle>
8+
<sld:Name>name</sld:Name>
9+
<sld:Rule>
10+
<sld:RasterSymbolizer>
11+
<Opacity>1.0</Opacity>
12+
<ColorMap extended="true">
13+
<sld:ColorMapEntry color="#2b83ba" label="&lt;= -15.0 °C" opacity="1.0" quantity="-15.0"/>
14+
<sld:ColorMapEntry color="#6bb0af" label="-10.0 °C" opacity="1.0" quantity="-10.0"/>
15+
<sld:ColorMapEntry color="#abdda4" label="-5.0 °C" opacity="1.0" quantity="-5.0"/>
16+
<sld:ColorMapEntry color="#d5eeb1" label="-2.5 °C" opacity="1.0" quantity="-2.5"/>
17+
<sld:ColorMapEntry color="#ffffbf" label="0 °C" opacity="1.0" quantity="0"/>
18+
<sld:ColorMapEntry color="#fed690" label="2.5 °C" opacity="1.0" quantity="2.5"/>
19+
<sld:ColorMapEntry color="#fdae61" label="5.0 °C" opacity="1.0" quantity="5.0"/>
20+
<sld:ColorMapEntry color="#ea633e" label="10.0 °C" opacity="1.0" quantity="10.0"/>
21+
<sld:ColorMapEntry color="#d7191c" label="&gt;= 15 °C" opacity="1.0" quantity="15.0"/>
22+
</ColorMap>
23+
</sld:RasterSymbolizer>
24+
</sld:Rule>
25+
</sld:FeatureTypeStyle>
26+
</sld:UserStyle>
27+
</sld:NamedLayer>
28+
</sld:StyledLayerDescriptor>
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<?xml version="1.0" encoding="UTF-8"?><sld:StyledLayerDescriptor xmlns="http://www.opengis.net/sld" xmlns:sld="http://www.opengis.net/sld" xmlns:ogc="http://www.opengis.net/ogc" xmlns:gml="http://www.opengis.net/gml" version="1.0.0">
2+
<sld:NamedLayer>
3+
<sld:Name>gistemp1200_ersstv4</sld:Name>
4+
<sld:UserStyle>
5+
<sld:Name>gistemp1200_ersstv4</sld:Name>
6+
<sld:Title>gistemp1200_ersstv4</sld:Title>
7+
<sld:FeatureTypeStyle>
8+
<sld:Name>name</sld:Name>
9+
<sld:Rule>
10+
<sld:RasterSymbolizer>
11+
<Opacity>1.0</Opacity>
12+
<ColorMap extended="true">
13+
<sld:ColorMapEntry color="#2b83ba" label="&lt;= -30 °C" opacity="1.0" quantity="-30"/>
14+
<sld:ColorMapEntry color="#6bb0af" label="-20 °C" opacity="1.0" quantity="-20"/>
15+
<sld:ColorMapEntry color="#abdda4" label="-10 °C" opacity="1.0" quantity="-10"/>
16+
<sld:ColorMapEntry color="#d5eeb1" label="0 °C" opacity="1.0" quantity="0"/>
17+
<sld:ColorMapEntry color="#ffffbf" label="10 °C" opacity="1.0" quantity="10"/>
18+
<sld:ColorMapEntry color="#fed690" label="20 °C" opacity="1.0" quantity="20"/>
19+
<sld:ColorMapEntry color="#fdae61" label="30 °C" opacity="1.0" quantity="30"/>
20+
<sld:ColorMapEntry color="#ea633e" label="40 °C" opacity="1.0" quantity="40"/>
21+
<sld:ColorMapEntry color="#d7191c" label="&gt;= 50 °C" opacity="1.0" quantity="50"/>
22+
</ColorMap>
23+
</sld:RasterSymbolizer>
24+
</sld:Rule>
25+
</sld:FeatureTypeStyle>
26+
</sld:UserStyle>
27+
</sld:NamedLayer>
28+
</sld:StyledLayerDescriptor>

dataqs/hadghcnd/tasks.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
###############################################################################
5+
# Copyright Kitware Inc. and Epidemico Inc.
6+
#
7+
# Licensed under the Apache License, Version 2.0 ( the "License" );
8+
# you may not use this file except in compliance with the License.
9+
# You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
###############################################################################
19+
20+
from __future__ import absolute_import
21+
22+
from celery import shared_task
23+
from dataqs.hadghcnd.hadghcnd import HadGHCNDProcessor
24+
25+
26+
@shared_task
27+
def hadghcnd_task():
28+
processor = HadGHCNDProcessor()
29+
processor.run()

0 commit comments

Comments
 (0)