|
1 | 1 | import time |
2 | 2 | import logging |
| 3 | +import multiprocessing |
3 | 4 | import warnings |
4 | 5 | from typing import Any, Dict, List, Optional, Union |
5 | 6 |
|
|
46 | 47 | from ml_grid.util.bayes_utils import is_skopt_space |
47 | 48 | from skopt.space import Categorical |
48 | 49 |
|
| 50 | +# Global flag to ensure TensorFlow/GPU setup runs only once per process |
| 51 | +_TF_INITIALIZED = False |
| 52 | + |
49 | 53 |
|
50 | 54 | class grid_search_crossvalidate: |
51 | 55 |
|
@@ -102,26 +106,42 @@ def __init__( |
102 | 106 |
|
103 | 107 | self.sub_sample_parameter_val = sub_sample_parameter_val |
104 | 108 |
|
105 | | - grid_n_jobs = self.global_params.grid_n_jobs |
| 109 | + # --- OPTIMIZATION: Detect Nested Parallelism --- |
| 110 | + # If running inside a worker process (daemon), force n_jobs=1 to prevent |
| 111 | + # oversubscription (outer loop parallel * inner loop parallel). |
| 112 | + if multiprocessing.current_process().daemon: |
| 113 | + self.global_params.grid_n_jobs = 1 |
| 114 | + grid_n_jobs = 1 |
| 115 | + else: |
| 116 | + grid_n_jobs = self.global_params.grid_n_jobs |
106 | 117 |
|
107 | 118 | # Configure GPU usage and job limits for specific models |
108 | 119 | is_gpu_model = ( |
109 | 120 | "keras" in method_name.lower() |
110 | 121 | or "xgb" in method_name.lower() |
111 | 122 | or "catboost" in method_name.lower() |
112 | 123 | ) |
| 124 | + |
| 125 | + global _TF_INITIALIZED |
113 | 126 | if is_gpu_model: |
114 | 127 | grid_n_jobs = 1 |
115 | | - try: |
116 | | - gpu_devices = tf.config.experimental.list_physical_devices("GPU") |
117 | | - if gpu_devices: |
118 | | - for device in gpu_devices: |
119 | | - tf.config.experimental.set_memory_growth(device, True) |
120 | | - else: |
121 | | - # Explicitly set CPU as the visible device for TensorFlow to avoid CUDA init errors |
122 | | - tf.config.set_visible_devices([], "GPU") |
123 | | - except Exception as e: |
124 | | - self.logger.warning(f"Could not configure GPU for TensorFlow: {e}") |
| 128 | + # --- OPTIMIZATION: One-time TF/GPU Setup --- |
| 129 | + if not _TF_INITIALIZED: |
| 130 | + try: |
| 131 | + gpu_devices = tf.config.experimental.list_physical_devices("GPU") |
| 132 | + if gpu_devices: |
| 133 | + for device in gpu_devices: |
| 134 | + try: |
| 135 | + tf.config.experimental.set_memory_growth(device, True) |
| 136 | + except RuntimeError: |
| 137 | + pass # Memory growth must be set before GPUs have been initialized |
| 138 | + else: |
| 139 | + # Explicitly set CPU as the visible device for TensorFlow to avoid CUDA init errors |
| 140 | + tf.config.set_visible_devices([], "GPU") |
| 141 | + except Exception as e: |
| 142 | + self.logger.warning(f"Could not configure GPU for TensorFlow: {e}") |
| 143 | + finally: |
| 144 | + _TF_INITIALIZED = True |
125 | 145 |
|
126 | 146 | self.metric_list = self.global_params.metric_list |
127 | 147 |
|
|
0 commit comments