Skip to content

Commit d373e5e

Browse files
committed
added downloader of geomagnetic data
1 parent 05fdfd1 commit d373e5e

6 files changed

Lines changed: 50 additions & 12 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ List of currently available datasets:
1717
3. `pl_banking_stocks` - daily prices of stocks of select
1818
Polish banks for period 2005-01-01 through 2024-12-31
1919
(data source: [stooq.com](https://stooq.com/))
20+
4. `sunspots` - daily total sunspot number data as per [SILSO](https://www.sidc.be/SILSO/datafiles)
21+
5. `geomagnetic_activity` - data on geomagnetic activity as per [GFZ Centre for Geosciences](https://kp.gfz.de/en/data)
2022

2123

2224
To install this package run:

moddata/_utils.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"btc",
1616
"pl_banking_stocks",
1717
"sunspots",
18-
"geomagnetic_indexes"
18+
"geomagnetic_activity"
1919
]
2020

2121

@@ -73,6 +73,13 @@ def _load_sunspots() -> pd.DataFrame:
7373
))
7474

7575

76+
def _load_geomagnetic_activity() -> pd.DataFrame:
77+
return pd.read_parquet(str(
78+
resources.files('moddata.data').joinpath(
79+
'geomagnetic_activity.parquet')
80+
))
81+
82+
7683
def load_data(dataset: Dataset) -> pd.DataFrame | None:
7784
if dataset == "bankchurn":
7885
return _load_bankchurn()
@@ -82,6 +89,6 @@ def load_data(dataset: Dataset) -> pd.DataFrame | None:
8289
return _load_pl_banking_stocks()
8390
if dataset == "sunspots":
8491
raise _load_sunspots()
85-
if dataset == "geomagnetic_indexes":
86-
raise Exception()
92+
if dataset == "geomagnetic_activity":
93+
raise _load_geomagnetic_activity()
8794
raise ValueError(f"Encountered invalid dataset name: {dataset}")
936 KB
Binary file not shown.

moddata/extractor/download_cboe_data_extractor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import pandas as pd
21

32

43
class DownloadCboeDataExtractor:
Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,44 @@
1+
import logging
12
from typing import Final
23

4+
import numpy as np
35
import pandas as pd
6+
import requests
7+
8+
logger = logging.getLogger(__name__)
49

510

611
class DownloadGeomagneticIndexExtractor:
712

8-
_DATA_URL: Final[str] = "https://kp.gfz.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt"
13+
_DATA_URL: Final[str] = \
14+
"https://kp.gfz.de/app/files/Kp_ap_Ap_SN_F107_since_1932.txt"
15+
_FIRST_DATA_LINE: Final[int] = 41
16+
_COLUMNS_TO_TYPES: Final[dict[str, type]] = {
17+
"year": int, "month": int, "day": int, "days": int, "days_m": float,
18+
"Bsr": int, "dB": int,
19+
"Kp1": float, "Kp2": float, "Kp3": float, "Kp4": float,
20+
"Kp5": float, "Kp6": float, "Kp7": float, "Kp8": float,
21+
"ap1": int, "ap2": int, "ap3": int, "ap4": int, "ap5": int,
22+
"ap6": int, "ap7": int, "ap8": int,
23+
"Ap": int, "SN": int,
24+
"F10.7obs": float, "F10.7adj": float, "D": int
25+
}
926

1027
def extract(self) -> pd.DataFrame:
11-
pass
12-
13-
14-
if __name__ == '__main__':
15-
DownloadGeomagneticIndexExtractor()
28+
data = requests.get(self._DATA_URL)
29+
lines: list[str] = str(data.content).split("\\n")
30+
data_lines: list[pd.DataFrame] = []
31+
for i, line in enumerate(lines[self._FIRST_DATA_LINE:], start=1):
32+
if i % 250 == 0:
33+
print(f"processing line {i}")
34+
cols = [col for col in line.split(" ") if col != ""]
35+
if len(cols) == 28:
36+
row = pd.DataFrame(
37+
data=np.array(cols).reshape(-1, 28),
38+
columns=list(self._COLUMNS_TO_TYPES.keys())
39+
)
40+
row = row.astype(self._COLUMNS_TO_TYPES) # noqa
41+
data_lines.append(
42+
row # noqa
43+
)
44+
return pd.concat(data_lines, axis=0).reset_index(drop=True)

moddata/extractor/download_sunspots_extractor.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99

1010
class DownloadSunspotsExtractor:
1111

12-
_DAILY_TOTAL_SUNSPOT_NUMBER_URL: Final[str] = "https://www.sidc.be/SILSO/INFO/sndtotcsv.php"
12+
_DAILY_TOTAL_SUNSPOT_NUMBER_URL: Final[str] = \
13+
"https://www.sidc.be/SILSO/INFO/sndtotcsv.php"
1314

1415
def extract(self) -> pd.DataFrame:
1516
data = pd.read_csv(
@@ -28,7 +29,7 @@ def extract(self) -> pd.DataFrame:
2829
data["day"] = (
2930
data["year"].astype(str) + "-" +
3031
data["month"].apply(lambda x: f"{x:02}") + "-" +
31-
data["day"].apply( lambda x: f"{x:02}")
32+
data["day"].apply(lambda x: f"{x:02}")
3233
)
3334
data = data[["day", "daily_sunspots_number"]]
3435
data["daily_sunspots_number"] = np.where(

0 commit comments

Comments
 (0)