Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 11 additions & 17 deletions quantammsim/core_simulator/windowing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,11 @@ def raw_trades_to_trade_array(raw_trades, start_date_string, end_date_string, to
filled with zeros.
"""
# Create a DataFrame with a continuous range of Unix timestamps
full_index = (
pd.date_range(
start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"),
end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"),
freq="T",
).astype(int)
// 10**6
)
full_index = pd.date_range(
start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"),
end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"),
freq="T",
).as_unit("ms").astype(np.int64)
full_index_df = pd.DataFrame(
index=full_index, columns=["token_in", "token_out", "amount_in"], data=0
)
Expand Down Expand Up @@ -193,14 +190,11 @@ def raw_fee_like_amounts_to_fee_like_array(
Timestamps without values are filled with zeros.
"""
# Create a DataFrame with a continuous range of Unix timestamps
full_index = (
pd.date_range(
start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"),
end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"),
freq="min",
).astype(int)
// 10**6
)[:-1]
full_index = pd.date_range(
start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"),
end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"),
freq="min",
).as_unit("ms").astype(np.int64)[:-1]
full_index_df = pd.DataFrame(
index=full_index,
columns=names,
Expand All @@ -226,7 +220,7 @@ def raw_fee_like_amounts_to_fee_like_array(
raise KeyError(f"raw_inputs missing required column: {name}")

# Convert start_date_string to unix timestamp
start_unix = pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S").value // 10**6
start_unix = int(pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S").timestamp() * 1000)

# Ensure unix values are valid
valid_unix = pd.to_numeric(raw_inputs['unix'], errors='coerce')
Expand Down
2 changes: 1 addition & 1 deletion quantammsim/runners/jax_runner_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,7 +1395,7 @@ def create_daily_unix_array(start_date_str, end_date_str):
end_date = pd.to_datetime(end_date_str)
# Create a date range ending the day before the end_date
date_range = pd.date_range(start=start_date_str, end=end_date, freq="D")
daily_unix_values = date_range.view("int64") // 10**6
daily_unix_values = date_range.as_unit("ms").astype(np.int64)
return daily_unix_values.tolist()


Expand Down
4 changes: 2 additions & 2 deletions quantammsim/utils/data_processing/amalgamated_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def import_crypto_historical_data(token, root_path):
)

# Convert UTC datetime to unix timestamp (ms)
df["unix"] = df["datetime"].astype(np.int64) // 10**6
df["unix"] = df["datetime"].dt.as_unit("ms").astype(np.int64)

# Add required columns to match existing format
df["symbol"] = f"{token}/USD"
Expand All @@ -43,7 +43,7 @@ def forward_fill_ohlcv_data(df, token):
end=pd.to_datetime(df.index.max(), unit="ms"),
freq="1min",
)
full_index = full_index.astype(np.int64) // 10**6
full_index = full_index.as_unit("ms").astype(np.int64)
# Reindex with the complete minute-level index
df = df.reindex(full_index)

Expand Down
4 changes: 2 additions & 2 deletions quantammsim/utils/data_processing/cmc_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def process_cmc_timestamps(df):
) + timedelta(hours=3)

# Convert timestamp to unix milliseconds
processed_df["unix"] = processed_df["timestamp"].astype(np.int64) // 10**6
processed_df["unix"] = processed_df["timestamp"].dt.as_unit("ms").astype(np.int64)

# Set unix as index and sort
processed_df.set_index("unix", inplace=True)
Expand All @@ -79,7 +79,7 @@ def forward_fill_cmc_data(df, token):
end=pd.to_datetime(df.index.max(), unit="ms"),
freq="1min",
)
full_index = full_index.astype(np.int64) // 10**6
full_index = full_index.as_unit("ms").astype(np.int64)

# Reindex with the complete minute-level index
df = df.reindex(full_index)
Expand Down
31 changes: 12 additions & 19 deletions quantammsim/utils/data_processing/historic_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,14 +392,11 @@ def update_historic_data_old(token, root):
concat_csv["unix"] = concat_csv.index
# Reindex on minute unix
# Create a new DataFrame with unix index and minute rows between csvData min and max
new_index = (
pd.date_range(
start=pd.to_datetime(csvData.index.min(), unit="ms"),
end=pd.to_datetime(csvData.index.max(), unit="ms"),
freq="T",
).astype(int)
// 10**6
)
new_index = pd.date_range(
start=pd.to_datetime(csvData.index.min(), unit="ms"),
end=pd.to_datetime(csvData.index.max(), unit="ms"),
freq="T",
).as_unit("ms").astype(np.int64)

new_csvData = pd.DataFrame(index=new_index)
new_csvData.index.name = "unix"
Expand Down Expand Up @@ -555,21 +552,17 @@ def update_historic_data_old(token, root):
start=pd.to_datetime(hourly_data.index.min(), unit="ms"),
end=pd.to_datetime(hourly_data.index.max(), unit="ms"),
freq="H",
).astype(int)
// 10**6
).as_unit("ms").astype(np.int64)
)
hourly_data["unix"] = hourly_data.index
hourly_data["close"] = hourly_data["close"].interpolate(method="linear")

# Create a new DataFrame with minute level data
minute_index = (
pd.date_range(
start=pd.to_datetime(hourly_data.index.min(), unit="ms"),
end=pd.to_datetime(hourly_data.index.max(), unit="ms"),
freq="T",
).astype(int)
// 10**6
)
minute_index = pd.date_range(
start=pd.to_datetime(hourly_data.index.min(), unit="ms"),
end=pd.to_datetime(hourly_data.index.max(), unit="ms"),
freq="T",
).as_unit("ms").astype(np.int64)
minute_data = pd.DataFrame(index=minute_index)
minute_data.index.name = "unix"
minute_data["unix"] = minute_data.index
Expand Down Expand Up @@ -987,7 +980,7 @@ def update_historic_data(token, root):
agg_dict = {k: v for k, v in agg_dict.items() if k in concated_df_hourly.columns}

# Perform resampling
hourly_data = concated_df_hourly.resample("1H").agg(agg_dict).reset_index()
hourly_data = concated_df_hourly.resample("1h").agg(agg_dict).reset_index()

# Save hourly data
hourly_data.to_csv(hourlyPath, index=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def expand_daily_to_minute_data(daily_data, scale="ms"):
minute_data.rename(columns={"index": "datetime"}, inplace=True)

# Convert 'datetime' back to unix timestamp
minute_data["unix"] = minute_data["datetime"].astype(np.int64) // 10**6
minute_data["unix"] = minute_data["datetime"].dt.as_unit("ms").astype(np.int64)

return minute_data

Expand Down
4 changes: 2 additions & 2 deletions quantammsim/utils/data_processing/st0x_data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def process_st0x_timestamps(df):
) + timedelta(hours=3)

# Convert timestamp to unix milliseconds
processed_df["unix"] = processed_df["timestamp"].astype(np.int64) // 10**6
processed_df["unix"] = processed_df["timestamp"].dt.as_unit("ms").astype(np.int64)

# Set unix as index and sort
processed_df.set_index("unix", inplace=True)
Expand All @@ -76,7 +76,7 @@ def forward_fill_st0x_data(df, token):
end=pd.to_datetime(df.index.max(), unit="ms"),
freq="1min",
)
full_index = full_index.astype(np.int64) // 10**6
full_index = full_index.as_unit("ms").astype(np.int64)

# Reindex with the complete minute-level index
df = df.reindex(full_index)
Expand Down