PyPortfolioOpt/pypfopt/data/data_loader.py at 6f254b6eb99a4c4e181b38ba489c2be50a0c2e9d · PyPortfolio/PyPortfolioOpt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from importlib import resources

import pandas as pd


def _load_raw_data(filename: str, **read_csv_kwargs):
    with resources.files(__package__).joinpath(filename).open("r") as f:
        return pd.read_csv(f, **read_csv_kwargs)


def load_stockdata(tickers: list = None, start: str = None, end: str = None):
    """
    Load example stock price data.

    This function loads a synthetic stock price dataset included with the
    package. The data can optionally be filtered by ticker symbols and
    date range.

    Parameters
    ----------
    tickers : list of str, optional
        List of ticker symbols to include. If ``None``, all available
        tickers are returned.
    start : str, optional
        Start date for filtering the dataset (inclusive). Should be
        interpretable by ``pandas.to_datetime``.
    end : str, optional
        End date for filtering the dataset (inclusive). Should be
        interpretable by ``pandas.to_datetime``.

    Returns
    -------
    pandas.DataFrame
        DataFrame of stock prices indexed by date. Columns correspond to
        ticker symbols and values represent price levels.

    Notes
    -----
    The dataset is bundled with the package and does not rely on external
    data sources. It is intended for examples and tutorials.
    """
    df = _load_raw_data("stock_prices.csv", parse_dates=["date"])

    if start is not None:
        df = df[df["date"] >= pd.to_datetime(start)]
    if end is not None:
        df = df[df["date"] <= pd.to_datetime(end)]

    if tickers is not None:
        cols = ["date"] + tickers
        df = df[cols]

    return df.set_index("date")


def load_marketcaps(tickers: list = None):
    """
    Load bundled example market capitalisation data.

    This function loads synthetic market capitalisation values for the
    example assets included in the package.

    Parameters
    ----------
    tickers : list of str, optional
        List of ticker symbols to return. If ``None``, market caps for all
        available tickers are returned.

    Returns
    -------
    dict
        Dictionary mapping ticker symbols to market capitalisation values.

    Notes
    -----
    The values are synthetic and provided solely for use in examples
    demonstrating portfolio optimisation methods.
    """
    df = _load_raw_data("market_caps.csv")

    if tickers is not None:
        available = set(df["ticker"])
        invalid = set(tickers) - available
        if invalid:
            raise ValueError(f"Invalid tickers: {invalid}")

        df = df[df["ticker"].isin(tickers)]

    return dict(zip(df["ticker"], df["market_cap"]))


def available_tickers():
    """
    Return the list of available ticker symbols.

    Returns
    -------
    list of str
        Sorted list of ticker symbols present in the bundled example
        dataset.

    Notes
    -----
    These tickers correspond to the columns available in the example
    stock price dataset returned by :func:`load_stockdata`.
    """
    cols = [
        "AAPL",
        "ACN",
        "AMD",
        "AMZN",
        "BAC",
        "BLK",
        "COST",
        "CVS",
        "DIS",
        "DPZ",
        "F",
        "GILD",
        "INTU",
        "JD",
        "JPM",
        "KO",
        "LUV",
        "MA",
        "MCD",
        "MSFT",
        "NAT",
        "NVDA",
        "PBI",
        "PFE",
        "SBUX",
        "SPY",
        "TGT",
        "TM",
        "TSLA",
        "UL",
        "UNH",
        "WMT",
        "XOM",
    ]

    return cols