Skip to content

Commit 6f46d67

Browse files
committed
added method for generation data at microsec frequency
1 parent 9778797 commit 6f46d67

4 files changed

Lines changed: 54 additions & 3 deletions

File tree

moddata/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from moddata._utils import load_data
2+
from moddata.dg.dg import make_milisec_data
23

34
__all__ = [
4-
"load_data"
5+
"load_data",
6+
"make_milisec_data"
57
]

moddata/dg/__init__.py

Whitespace-only changes.

moddata/dg/dg.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from datetime import date, datetime, time, timedelta
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
from moddata.src.constants import MICRO_SECS_PER_DAY
7+
8+
9+
def make_milisec_data(
10+
inter_arrival_time_milisecs: float = 50,
11+
start_value: float = 5.00,
12+
daily_sigma: float = 0.02,
13+
day: date = date(2021, 1, 1),
14+
seed: int = 123_456
15+
):
16+
"""
17+
Generate 1 day of data using GBM without drift.
18+
Arrivals have exponential distribution.
19+
20+
:param inter_arrival_time_milisecs:
21+
:param start_value: starting value of the GBM process
22+
:param daily_sigma: standard deviation of the GBM for 1d period
23+
:param day: date to use in the simulation
24+
:param seed: random seed
25+
:return: pd.DataFrame with columns quote_time and price
26+
"""
27+
dt: datetime = datetime.combine(day, time(0, 0, 0, 0))
28+
time_grid: list[datetime] = [dt]
29+
prices: list[float] = [start_value]
30+
np.random.seed(seed)
31+
32+
while dt.date() == day:
33+
time_diff = int(np.random.exponential(
34+
scale=inter_arrival_time_milisecs)) + 1
35+
dt += timedelta(milliseconds=time_diff)
36+
time_grid.append(dt)
37+
prices.append(
38+
prices[-1] * (
39+
1 +
40+
np.random.standard_normal(size=1)[0] *
41+
np.sqrt(time_diff / MICRO_SECS_PER_DAY) *
42+
daily_sigma
43+
)
44+
)
45+
46+
return pd.DataFrame(data={"quote_time": time_grid, "price": prices})

moddata/src/constants.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,20 @@
55
"""
66

77

8-
from typing import TypeAlias, Literal
8+
from typing import TypeAlias, Literal, Final
99

1010
import pandas as pd
1111

1212
EncodingAndScalingModelType: TypeAlias = Literal[
1313
"tree_like",
14-
"other"
14+
"other",
15+
"MICRO_SECS_PER_DAY"
1516
]
1617

1718
XyDataFrames: TypeAlias = tuple[pd.DataFrame, pd.DataFrame]
1819

1920
TrainTestXyDataFrames: TypeAlias = (
2021
tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]
2122
)
23+
24+
MICRO_SECS_PER_DAY: Final[int] = 24 * 60 * 60 * 1_000

0 commit comments

Comments
 (0)