iverson_2000/Parse_MIDAS_daily.py at master · sgrieve/iverson_2000 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
"""
This script parses data derived from MIDAS weather station data
Created on Tue Nov 22 16:38:15 2016


HOW TO GET DATA
Search for station here
http://badc.nerc.ac.uk/search/midas_stations/

Best way is to use postcode

Get the station ID

Go to the CEDA page
http://wps-web1.ceda.ac.uk/ui/home

Got to web processes

select
Extract weather station data

Choose decadal output
choose daily rainfall
input station data

wait

@author: smudd
"""

import pandas as pd


def load_MIDAS_data():
    #fname = "station_data-196101010000-196112312359.csv"
    #fname = "station_data-201001010000-201611161701.csv"
    fname = "new_small.csv"
    dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M')

    # Read in the data
    MIDAS_df  = pd.read_csv(fname,parse_dates=True,date_parser=dateparse)

    # get rid of stupid whitespace in the header
    MIDAS_df.columns = MIDAS_df.columns.str.strip()

    # Make sure that whitespace has been stripped
    a = list(MIDAS_df.columns.values)
    print "Headers are:"
    print a

    # Get rid of some useless columns
    MIDAS_df.drop('id', axis=1, inplace=True)
    MIDAS_df.drop('id_type', axis=1, inplace=True)
    MIDAS_df.drop('met_domain_name', axis=1, inplace=True)
    MIDAS_df.drop('ob_end_ctime', axis=1, inplace=True)
    MIDAS_df.drop('version_num', axis=1, inplace=True)
    MIDAS_df.drop('ob_day_cnt_q', axis=1, inplace=True)
    MIDAS_df.drop('meto_stmp_time', axis=1, inplace=True)
    MIDAS_df.drop('midas_stmp_etime', axis=1, inplace=True)
    MIDAS_df.drop('prcp_amt_j', axis=1, inplace=True)

    # Parse dates
    MIDAS_df['ob_date'] =  pd.to_datetime(MIDAS_df['ob_date'], format='%Y-%m-%d %H:%M')

    # get rid of exactly duplicate lines
    MIDAS_df = MIDAS_df.drop_duplicates()


    # Make sure we are only dealing with 1 day records (not monthly totals)
    MIDAS_df = MIDAS_df[MIDAS_df.ob_day_cnt == 1]
    #print yo['ob_date']

    # Make a timestamp for the year 1900
    yr_1900 = pd.Timestamp('1900-01-01')

    # Now we are going to have to group by station
    # print to file, organised by station.
    for station, df_station in MIDAS_df.groupby('src_id'):
        fname = "MidasNEW_"+str(station)+".csv"

        df_station = df_station.drop_duplicates(['ob_date'], keep="last")
        new_MIDAS = df_station.copy()

        # get a column that has the days since 1900
        new_MIDAS['days_since_1900'] = (new_MIDAS['ob_date'] - yr_1900).dt.days

        new_MIDAS.to_csv(fname)


if __name__ == "__main__":
    #compare_linear_to_loop()
    #test_FoS()
    load_MIDAS_data()