diff --git a/quantammsim/core_simulator/windowing_utils.py b/quantammsim/core_simulator/windowing_utils.py index 052a77e..86735d0 100644 --- a/quantammsim/core_simulator/windowing_utils.py +++ b/quantammsim/core_simulator/windowing_utils.py @@ -128,14 +128,11 @@ def raw_trades_to_trade_array(raw_trades, start_date_string, end_date_string, to filled with zeros. """ # Create a DataFrame with a continuous range of Unix timestamps - full_index = ( - pd.date_range( - start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"), - end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"), - freq="T", - ).astype(int) - // 10**6 - ) + full_index = pd.date_range( + start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"), + end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"), + freq="T", + ).as_unit("ms").astype(np.int64) full_index_df = pd.DataFrame( index=full_index, columns=["token_in", "token_out", "amount_in"], data=0 ) @@ -193,14 +190,11 @@ def raw_fee_like_amounts_to_fee_like_array( Timestamps without values are filled with zeros. """ # Create a DataFrame with a continuous range of Unix timestamps - full_index = ( - pd.date_range( - start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"), - end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"), - freq="min", - ).astype(int) - // 10**6 - )[:-1] + full_index = pd.date_range( + start=pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S"), + end=pd.to_datetime(end_date_string, format="%Y-%m-%d %H:%M:%S"), + freq="min", + ).as_unit("ms").astype(np.int64)[:-1] full_index_df = pd.DataFrame( index=full_index, columns=names, @@ -226,7 +220,7 @@ def raw_fee_like_amounts_to_fee_like_array( raise KeyError(f"raw_inputs missing required column: {name}") # Convert start_date_string to unix timestamp - start_unix = pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S").value // 10**6 + start_unix = int(pd.to_datetime(start_date_string, format="%Y-%m-%d %H:%M:%S").timestamp() * 1000) # Ensure unix values are valid valid_unix = pd.to_numeric(raw_inputs['unix'], errors='coerce') diff --git a/quantammsim/runners/jax_runner_utils.py b/quantammsim/runners/jax_runner_utils.py index 77bcadb..8f6214b 100644 --- a/quantammsim/runners/jax_runner_utils.py +++ b/quantammsim/runners/jax_runner_utils.py @@ -1395,7 +1395,7 @@ def create_daily_unix_array(start_date_str, end_date_str): end_date = pd.to_datetime(end_date_str) # Create a date range ending the day before the end_date date_range = pd.date_range(start=start_date_str, end=end_date, freq="D") - daily_unix_values = date_range.view("int64") // 10**6 + daily_unix_values = date_range.as_unit("ms").astype(np.int64) return daily_unix_values.tolist() diff --git a/quantammsim/utils/data_processing/amalgamated_data_utils.py b/quantammsim/utils/data_processing/amalgamated_data_utils.py index 61cd65f..80ee4c3 100644 --- a/quantammsim/utils/data_processing/amalgamated_data_utils.py +++ b/quantammsim/utils/data_processing/amalgamated_data_utils.py @@ -21,7 +21,7 @@ def import_crypto_historical_data(token, root_path): ) # Convert UTC datetime to unix timestamp (ms) - df["unix"] = df["datetime"].astype(np.int64) // 10**6 + df["unix"] = df["datetime"].dt.as_unit("ms").astype(np.int64) # Add required columns to match existing format df["symbol"] = f"{token}/USD" @@ -43,7 +43,7 @@ def forward_fill_ohlcv_data(df, token): end=pd.to_datetime(df.index.max(), unit="ms"), freq="1min", ) - full_index = full_index.astype(np.int64) // 10**6 + full_index = full_index.as_unit("ms").astype(np.int64) # Reindex with the complete minute-level index df = df.reindex(full_index) diff --git a/quantammsim/utils/data_processing/cmc_data_utils.py b/quantammsim/utils/data_processing/cmc_data_utils.py index 578ffc7..7e89f0d 100644 --- a/quantammsim/utils/data_processing/cmc_data_utils.py +++ b/quantammsim/utils/data_processing/cmc_data_utils.py @@ -52,7 +52,7 @@ def process_cmc_timestamps(df): ) + timedelta(hours=3) # Convert timestamp to unix milliseconds - processed_df["unix"] = processed_df["timestamp"].astype(np.int64) // 10**6 + processed_df["unix"] = processed_df["timestamp"].dt.as_unit("ms").astype(np.int64) # Set unix as index and sort processed_df.set_index("unix", inplace=True) @@ -79,7 +79,7 @@ def forward_fill_cmc_data(df, token): end=pd.to_datetime(df.index.max(), unit="ms"), freq="1min", ) - full_index = full_index.astype(np.int64) // 10**6 + full_index = full_index.as_unit("ms").astype(np.int64) # Reindex with the complete minute-level index df = df.reindex(full_index) diff --git a/quantammsim/utils/data_processing/historic_data_utils.py b/quantammsim/utils/data_processing/historic_data_utils.py index 31fb76c..1954405 100644 --- a/quantammsim/utils/data_processing/historic_data_utils.py +++ b/quantammsim/utils/data_processing/historic_data_utils.py @@ -392,14 +392,11 @@ def update_historic_data_old(token, root): concat_csv["unix"] = concat_csv.index # Reindex on minute unix # Create a new DataFrame with unix index and minute rows between csvData min and max - new_index = ( - pd.date_range( - start=pd.to_datetime(csvData.index.min(), unit="ms"), - end=pd.to_datetime(csvData.index.max(), unit="ms"), - freq="T", - ).astype(int) - // 10**6 - ) + new_index = pd.date_range( + start=pd.to_datetime(csvData.index.min(), unit="ms"), + end=pd.to_datetime(csvData.index.max(), unit="ms"), + freq="T", + ).as_unit("ms").astype(np.int64) new_csvData = pd.DataFrame(index=new_index) new_csvData.index.name = "unix" @@ -555,21 +552,17 @@ def update_historic_data_old(token, root): start=pd.to_datetime(hourly_data.index.min(), unit="ms"), end=pd.to_datetime(hourly_data.index.max(), unit="ms"), freq="H", - ).astype(int) - // 10**6 + ).as_unit("ms").astype(np.int64) ) hourly_data["unix"] = hourly_data.index hourly_data["close"] = hourly_data["close"].interpolate(method="linear") # Create a new DataFrame with minute level data - minute_index = ( - pd.date_range( - start=pd.to_datetime(hourly_data.index.min(), unit="ms"), - end=pd.to_datetime(hourly_data.index.max(), unit="ms"), - freq="T", - ).astype(int) - // 10**6 - ) + minute_index = pd.date_range( + start=pd.to_datetime(hourly_data.index.min(), unit="ms"), + end=pd.to_datetime(hourly_data.index.max(), unit="ms"), + freq="T", + ).as_unit("ms").astype(np.int64) minute_data = pd.DataFrame(index=minute_index) minute_data.index.name = "unix" minute_data["unix"] = minute_data.index @@ -987,7 +980,7 @@ def update_historic_data(token, root): agg_dict = {k: v for k, v in agg_dict.items() if k in concated_df_hourly.columns} # Perform resampling - hourly_data = concated_df_hourly.resample("1H").agg(agg_dict).reset_index() + hourly_data = concated_df_hourly.resample("1h").agg(agg_dict).reset_index() # Save hourly data hourly_data.to_csv(hourlyPath, index=False) diff --git a/quantammsim/utils/data_processing/minute_daily_conversion_utils.py b/quantammsim/utils/data_processing/minute_daily_conversion_utils.py index b86c352..36b747b 100644 --- a/quantammsim/utils/data_processing/minute_daily_conversion_utils.py +++ b/quantammsim/utils/data_processing/minute_daily_conversion_utils.py @@ -33,7 +33,7 @@ def expand_daily_to_minute_data(daily_data, scale="ms"): minute_data.rename(columns={"index": "datetime"}, inplace=True) # Convert 'datetime' back to unix timestamp - minute_data["unix"] = minute_data["datetime"].astype(np.int64) // 10**6 + minute_data["unix"] = minute_data["datetime"].dt.as_unit("ms").astype(np.int64) return minute_data diff --git a/quantammsim/utils/data_processing/st0x_data_utils.py b/quantammsim/utils/data_processing/st0x_data_utils.py index b7b1f0d..730173c 100644 --- a/quantammsim/utils/data_processing/st0x_data_utils.py +++ b/quantammsim/utils/data_processing/st0x_data_utils.py @@ -49,7 +49,7 @@ def process_st0x_timestamps(df): ) + timedelta(hours=3) # Convert timestamp to unix milliseconds - processed_df["unix"] = processed_df["timestamp"].astype(np.int64) // 10**6 + processed_df["unix"] = processed_df["timestamp"].dt.as_unit("ms").astype(np.int64) # Set unix as index and sort processed_df.set_index("unix", inplace=True) @@ -76,7 +76,7 @@ def forward_fill_st0x_data(df, token): end=pd.to_datetime(df.index.max(), unit="ms"), freq="1min", ) - full_index = full_index.astype(np.int64) // 10**6 + full_index = full_index.as_unit("ms").astype(np.int64) # Reindex with the complete minute-level index df = df.reindex(full_index)