1+ import logging
12from typing import Final
23
4+ import numpy as np
35import pandas as pd
6+ import requests
7+
8+ logger = logging .getLogger (__name__ )
49
510
611class DownloadGeomagneticIndexExtractor :
712
8- _DATA_URL : Final [str ] = "https://kp.gfz.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt"
13+ _DATA_URL : Final [str ] = \
14+ "https://kp.gfz.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt"
15+ _FIRST_DATA_LINE : Final [int ] = 41
16+ _COLUMNS_TO_TYPES : Final [dict [str , type ]] = {
17+ "year" : int , "month" : int , "day" : int , "days" : int , "days_m" : float ,
18+ "Bsr" : int , "dB" : int ,
19+ "Kp1" : float , "Kp2" : float , "Kp3" : float , "Kp4" : float ,
20+ "Kp5" : float , "Kp6" : float , "Kp7" : float , "Kp8" : float ,
21+ "ap1" : int , "ap2" : int , "ap3" : int , "ap4" : int , "ap5" : int ,
22+ "ap6" : int , "ap7" : int , "ap8" : int ,
23+ "Ap" : int , "SN" : int ,
24+ "F10.7obs" : float , "F10.7adj" : float , "D" : int
25+ }
926
1027 def extract (self ) -> pd .DataFrame :
11- pass
12-
13-
14- if __name__ == '__main__' :
15- DownloadGeomagneticIndexExtractor ()
28+ data = requests .get (self ._DATA_URL )
29+ lines : list [str ] = str (data .content ).split ("\\ n" )
30+ data_lines : list [pd .DataFrame ] = []
31+ for i , line in enumerate (lines [self ._FIRST_DATA_LINE :], start = 1 ):
32+ if i % 250 == 0 :
33+ print (f"processing line { i } " )
34+ cols = [col for col in line .split (" " ) if col != "" ]
35+ if len (cols ) == 28 :
36+ row = pd .DataFrame (
37+ data = np .array (cols ).reshape (- 1 , 28 ),
38+ columns = list (self ._COLUMNS_TO_TYPES .keys ())
39+ )
40+ row = row .astype (self ._COLUMNS_TO_TYPES ) # noqa
41+ data_lines .append (
42+ row # noqa
43+ )
44+ return pd .concat (data_lines , axis = 0 ).reset_index (drop = True )
0 commit comments