|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +############################################################################### |
| 5 | +# Copyright Kitware Inc. and Epidemico Inc. |
| 6 | +# |
| 7 | +# Licensed under the Apache License, Version 2.0 ( the "License" ); |
| 8 | +# you may not use this file except in compliance with the License. |
| 9 | +# You may obtain a copy of the License at |
| 10 | +# |
| 11 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 12 | +# |
| 13 | +# Unless required by applicable law or agreed to in writing, software |
| 14 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 15 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 16 | +# See the License for the specific language governing permissions and |
| 17 | +# limitations under the License. |
| 18 | +############################################################################### |
| 19 | + |
| 20 | +from __future__ import absolute_import |
| 21 | +import logging |
| 22 | +import os |
| 23 | +import re |
| 24 | +import shutil |
| 25 | +from datetime import date |
| 26 | +from time import sleep |
| 27 | +import gdal |
| 28 | +from dateutil.relativedelta import relativedelta |
| 29 | +from dataqs.processor_base import GeoDataMosaicProcessor, GS_DATA_DIR, \ |
| 30 | + GS_TMP_DIR, RSYNC_WAIT_TIME |
| 31 | +from dataqs.helpers import gdal_translate, style_exists, create_band_vrt, untar |
| 32 | + |
| 33 | +logger = logging.getLogger("dataqs.processors") |
| 34 | +script_dir = os.path.dirname(os.path.realpath(__file__)) |
| 35 | + |
| 36 | + |
| 37 | +class HadGHCNDProcessor(GeoDataMosaicProcessor): |
| 38 | + """ |
| 39 | + Hadley Centre gridded daily temperature dataset based upon near-surface |
| 40 | + maximum (TX) and minimum (TN) temperature observations. |
| 41 | + It is designed primarily for the analysis of climate extremes and also for |
| 42 | + climate model evaluation. It spans the years 1950 to present and is |
| 43 | + available on a 2.5° latitude by 3.75° longitude grid. This dataset has |
| 44 | + been developed in collaboration with the United States National Centers for |
| 45 | + Environmental Information (NCEI), formerly the National Climatic Data Center |
| 46 | + (NCDC). |
| 47 | + More info at http://www.metoffice.gov.uk/hadobs/hadghcnd/index.html |
| 48 | + """ |
| 49 | + prefix = "HadGHCND" |
| 50 | + tmp_dir = os.path.join(GS_TMP_DIR, prefix) |
| 51 | + base_url = "http://www.metoffice.gov.uk/hadobs/hadghcnd/data/" |
| 52 | + |
| 53 | + layers = { |
| 54 | + 'HadGHCND_TXTN_anoms_1950-2014_15052015.nc.tgz': { |
| 55 | + 'title': 'HadGHCND Temperature Anomalies - {measure}, 1950-2014', |
| 56 | + 'name': '{prefix}_anomalies_{measure}' |
| 57 | + }, |
| 58 | + 'HadGHCND_TXTN_acts_1950-2014_15102015.nc.tgz': { |
| 59 | + 'title': 'HadGHCND Actual Temperatures - {measure}, 1950-2014', |
| 60 | + 'name': '{prefix}_temperatures_{measure}' |
| 61 | + } |
| 62 | + |
| 63 | + } |
| 64 | + abstract = ( |
| 65 | + u"HadGHCND is a gridded daily temperature dataset based upon near-" |
| 66 | + u"surface maximum (TX) and minimum (TN) temperature observations. It is" |
| 67 | + u" designed primarily for the analysis of climate extremes and also for" |
| 68 | + u" climate model evaluation. It spans the years 1950 to present and is " |
| 69 | + u"available on a 2.5° latitude by 3.75° longitude grid. This dataset " |
| 70 | + u"has been developed in collaboration with the United States National " |
| 71 | + u"Centers for Environmental Information (NCEI), formerly the National " |
| 72 | + u"Climatic Data Center (NCDC).\n\nHadGHCND has been created using daily" |
| 73 | + u" station observations in NCEI's GHCN (Global Historical Climatology " |
| 74 | + u"Network)-Daily database. This consists of over 27,000 stations with " |
| 75 | + u"temperature observations, though many of these have quite short " |
| 76 | + u"records, or gaps in the record. Quality control has been carried out " |
| 77 | + u"to indicate potentially spurious values. We have filtered down these " |
| 78 | + u"stations to obtain those for which we can adequately calculate a 1961" |
| 79 | + u"-90 daily climatology. The dataset represents anomalies from the 1961" |
| 80 | + u"-1990 climatology.\n\nAn angular-distance weighting approach was used" |
| 81 | + u" to interpolate the station data onto the required grid. We chose to " |
| 82 | + u"grid the station anomalies to overcome some of the issues associated " |
| 83 | + u"with elevation dependence.\n\nThe data are available as gridded " |
| 84 | + u"anomalies, relative to the 1961-90 base period, and also as gridded " |
| 85 | + u"actual temperatures. The actual temperatures were created by gridding" |
| 86 | + u" the daily normals and adding these to the relevant daily anomaly.\n" |
| 87 | + u"\nSource: http://www.metoffice.gov.uk/hadobs/hadghcnd/index.html\n\n" |
| 88 | + u"Raw data file: {}") |
| 89 | + |
| 90 | + def extract_band(self, ncfile, band, outname, projection=None): |
| 91 | + """ |
| 92 | + Extract specified band from NetCDF file and convert to GeoTIFF, |
| 93 | + using a VRT file to swap E-W axis |
| 94 | + :param ncfile: NetCDF input filename, formatted for use in GDAL |
| 95 | + :param band: Band number to process |
| 96 | + :param outname: Output GeoTIFF filename |
| 97 | + :param projection: Projection to use |
| 98 | + :return: Full pathname of output GeoTIFF |
| 99 | + """ |
| 100 | + temp_vrt = os.path.join(self.tmp_dir, outname + '.vrt') |
| 101 | + try: |
| 102 | + source_xml = """ |
| 103 | + <root> |
| 104 | + <SimpleSource> |
| 105 | + <SourceFilename relativeToVRT="1">{fname}</SourceFilename> |
| 106 | + <SourceBand>{band}</SourceBand> |
| 107 | + <SrcRect xOff="48" yOff="0" xSize="96" ySize="73"/> |
| 108 | + <DstRect xOff="0" yOff="0" xSize="96" ySize="73"/> |
| 109 | + </SimpleSource> |
| 110 | + <SimpleSource> |
| 111 | + <SourceFilename relativeToVRT="1">{fname}</SourceFilename> |
| 112 | + <SourceBand>{band}</SourceBand> |
| 113 | + <SrcRect xOff="0" yOff="0" xSize="96" ySize="73"/> |
| 114 | + <DstRect xOff="48" yOff="0" xSize="96" ySize="73"/> |
| 115 | + </SimpleSource> |
| 116 | + </root> |
| 117 | + """.format(fname=ncfile, band=band) |
| 118 | + geotransform = ','.join(['-1.818750000000000000e+02', |
| 119 | + '3.7500000000000000e+00', |
| 120 | + '0.0000000000000000e+00', |
| 121 | + '9.1250000000000000e+01', |
| 122 | + '0.0000000000000000e+00', |
| 123 | + '-2.5000000000000000e+00']) |
| 124 | + create_band_vrt(ncfile, temp_vrt, [band], source_xml, |
| 125 | + projection=projection, geotransform=geotransform) |
| 126 | + gdal_translate(temp_vrt, |
| 127 | + os.path.join(self.tmp_dir, outname), |
| 128 | + projection='EPSG:4326', |
| 129 | + options=['TILED=YES', 'COMPRESS=LZW']) |
| 130 | + finally: |
| 131 | + if os.path.exists(temp_vrt): |
| 132 | + os.remove(temp_vrt) |
| 133 | + |
| 134 | + return os.path.join(self.tmp_dir, outname) |
| 135 | + |
| 136 | + def get_date(self, days): |
| 137 | + """ |
| 138 | + Calculate the date from the NetCDF band time value in days since 0/0/00 |
| 139 | + :param days: Days since 00/00/00 |
| 140 | + :return: Python date object |
| 141 | + """ |
| 142 | + start = date(1, 1, 1) |
| 143 | + return start + relativedelta(days=days-2) + relativedelta(years=-1) |
| 144 | + |
| 145 | + def run(self): |
| 146 | + """ |
| 147 | + Retrieve and process the HadGHCND climate data. |
| 148 | + """ |
| 149 | + for key in self.layers.keys(): |
| 150 | + src = os.path.join(self.base_url, key) |
| 151 | + tarfile = self.download(src) |
| 152 | + cdf_files = untar(os.path.join(self.tmp_dir, tarfile), self.tmp_dir) |
| 153 | + for cdf in cdf_files: |
| 154 | + for measure in ('tmin', 'tmax'): |
| 155 | + ncds_gdal_name = 'NETCDF:{}:{}'.format(cdf, measure) |
| 156 | + ncds = gdal.Open(ncds_gdal_name) |
| 157 | + bands = ncds.RasterCount |
| 158 | + layer_name = self.layers[key]['name'].format( |
| 159 | + prefix=self.prefix, measure=measure |
| 160 | + ) |
| 161 | + img_list = self.get_mosaic_filenames(layer_name) |
| 162 | + files = [] |
| 163 | + for band in range(1, min(11, bands + 1)): |
| 164 | + days = int(ncds.GetRasterBand(band) |
| 165 | + .GetMetadata()['NETCDF_DIM_time']) |
| 166 | + band_date = re.sub('[\-\.]+', '', |
| 167 | + self.get_date(days).isoformat()) |
| 168 | + img_name = '{}_{}T000000000Z.tif'.format(layer_name, |
| 169 | + band_date) |
| 170 | + if img_name not in img_list: |
| 171 | + band_tif = self.extract_band(ncds_gdal_name, |
| 172 | + band, |
| 173 | + img_name, |
| 174 | + projection='WGS84') |
| 175 | + dst_file = self.data_dir.format(gsd=GS_DATA_DIR, |
| 176 | + ws=self.workspace, |
| 177 | + layer=layer_name, |
| 178 | + file=img_name) |
| 179 | + dst_dir = os.path.dirname(dst_file) |
| 180 | + if not os.path.exists(dst_dir): |
| 181 | + os.makedirs(dst_dir) |
| 182 | + shutil.move(band_tif, dst_file) |
| 183 | + files.append(dst_file) |
| 184 | + sleep(RSYNC_WAIT_TIME * 2) |
| 185 | + for file in files: |
| 186 | + self.post_geoserver(file, layer_name, sleeptime=0) |
| 187 | + style = '_'.join(layer_name.split('_')[0:2]) |
| 188 | + if not style_exists(layer_name): |
| 189 | + with open(os.path.join(script_dir, |
| 190 | + 'resources/{}.sld'.format( |
| 191 | + style))) as sld: |
| 192 | + self.set_default_style(layer_name, |
| 193 | + layer_name, |
| 194 | + sld.read()) |
| 195 | + title = self.layers[key]['title'].format(measure=measure) |
| 196 | + self.update_geonode(layer_name, |
| 197 | + title=title, |
| 198 | + description=self.abstract.format(src), |
| 199 | + store=layer_name, |
| 200 | + bounds=('-180.0', '180.0', |
| 201 | + '-90.0', '90.0', |
| 202 | + 'EPSG:4326')) |
| 203 | + self.truncate_gs_cache(layer_name) |
| 204 | + self.cleanup() |
| 205 | + |
| 206 | + |
| 207 | +if __name__ == '__main__': |
| 208 | + processor = HadGHCNDProcessor() |
| 209 | + processor.run() |
0 commit comments