Skip to content

Commit 1135845

Browse files
committed
patch for time series data in pipe if time series not installed. minimum features retention added.
1 parent e78ed3e commit 1135845

1 file changed

Lines changed: 36 additions & 8 deletions

File tree

ml_grid/pipeline/data.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,7 @@
1919
from ml_grid.pipeline.data_train_test_split import *
2020
from ml_grid.pipeline.logs_project_folder import log_folder
2121
from ml_grid.pipeline.model_class_list import get_model_class_list
22-
from ml_grid.pipeline.model_class_list_ts import get_model_class_list_ts
2322
from ml_grid.util.global_params import global_parameters
24-
from ml_grid.util.time_series_helper import (
25-
convert_Xy_to_time_series,
26-
max_client_idcode_sequence_length,
27-
)
2823

2924
ConvergenceWarning("ignore")
3025

@@ -270,20 +265,37 @@ def __init__(
270265
# self.X = self.X.rename(columns = lambda x:re.sub('[^A-Za-z0-9]+', '', x))
271266

272267
if self.time_series_mode:
268+
try:
269+
from ml_grid.util.time_series_helper import (
270+
convert_Xy_to_time_series,
271+
max_client_idcode_sequence_length,
272+
)
273+
except (ImportError, ModuleNotFoundError):
274+
print("\n--- WARNING: Time-series libraries not found. ---")
275+
print(
276+
"To run in time-series mode, please install the required dependencies:"
277+
)
278+
print(
279+
"1. Activate the correct virtual environment: source ml_grid_ts_env/bin/activate"
280+
)
281+
print("2. If not installed, run: ./install_ts.sh (or install_ts.bat on Windows)")
282+
print("-----------------------------------------------------\n")
283+
raise
284+
273285
if self.verbose >= 1:
274286
print("pre func")
275287
display(self.X)
276288

277289
max_seq_length = max_client_idcode_sequence_length(self.df)
278290

279-
if self.time_series_mode:
280291
if self.verbose >= 1:
281292
print("time_series_mode", "convert_df_to_time_series")
282293
print(self.X.shape)
283294

284295
self.X, self.y = convert_Xy_to_time_series(self.X, self.y, max_seq_length)
285296
if self.verbose >= 1:
286297
print(self.X.shape)
298+
287299
(
288300
self.X_train,
289301
self.X_test,
@@ -309,8 +321,9 @@ def __init__(
309321
(target_n_features / 100) * self.X_train.shape[1]
310322
)
311323

312-
if target_n_features_eval < self.X_train.shape[1]:
313-
target_n_features_eval = self.X_train.shape[1]
324+
# Ensure at least one feature is selected. The previous logic here
325+
# was incorrect and disabled feature selection entirely.
326+
target_n_features_eval = max(1, target_n_features_eval)
314327

315328
print(
316329
f"Pre target_n_features {target_n_features}% reduction {target_n_features_eval}/{self.X_train.shape[1]}"
@@ -352,6 +365,21 @@ def __init__(
352365
if time_series_mode:
353366
if self.verbose >= 2:
354367
print("data>>", "get_model_class_list_ts")
368+
try:
369+
from ml_grid.pipeline.model_class_list_ts import (
370+
get_model_class_list_ts,
371+
)
372+
except (ImportError, ModuleNotFoundError):
373+
print("\n--- WARNING: Time-series libraries not found. ---")
374+
print(
375+
"To run in time-series mode, please install the required dependencies:"
376+
)
377+
print(
378+
"1. Activate the correct virtual environment: source ml_grid_ts_env/bin/activate"
379+
)
380+
print("2. If not installed, run: ./install_ts.sh (or install_ts.bat on Windows)")
381+
print("-----------------------------------------------------\n")
382+
raise
355383
self.model_class_list = get_model_class_list_ts(self)
356384

357385
else:

0 commit comments

Comments
 (0)