diff --git a/tinyml-modelmaker/tests/test_nas_support.py b/tinyml-modelmaker/tests/test_nas_support.py new file mode 100644 index 00000000..196fdba5 --- /dev/null +++ b/tinyml-modelmaker/tests/test_nas_support.py @@ -0,0 +1,175 @@ +"""Tests for NAS support in run_tinyml_modelmaker. + +These tests verify that the NAS-related guards in main() work correctly: +1. Model catalog validation is skipped when nas_enabled=True +2. A fallback model_description is generated with correct fields +3. Non-NAS models still fail validation when not in catalog +""" + +import types +from unittest import mock + +import pytest + + +def _make_config(nas_enabled=False, model_name="NAS_m", training_enable=True): + """Build a minimal config dict for testing.""" + return { + "common": { + "target_device": "F28P55", + "task_type": "generic_timeseries_classification", + }, + "dataset": {"enable": False, "dataset_name": "default"}, + "data_processing_feature_extraction": {"feature_extraction_name": "default"}, + "training": { + "enable": training_enable, + "model_name": model_name, + "nas_enabled": nas_enabled, + }, + "testing": {"enable": False}, + "compilation": {"enable": False, "compile_preset_name": "default_preset"}, + } + + +class TestNASModelValidation: + """Test NAS model validation bypass in run_tinyml_modelmaker.main().""" + + def test_unknown_model_rejected_without_nas(self): + """A fake model name should cause main() to return False when NAS is off.""" + from tinyml_modelmaker.run_tinyml_modelmaker import main + + config = _make_config(nas_enabled=False, model_name="NONEXISTENT_MODEL_XYZ") + result = main(config) + assert result is False + + def test_unknown_model_allowed_with_nas(self): + """When NAS is enabled, an unknown model name should NOT cause early rejection.""" + from tinyml_modelmaker.run_tinyml_modelmaker import main + + config = _make_config(nas_enabled=True, model_name="NAS_m") + # main() will proceed past validation but may fail later (no real training env). + # We patch ModelRunner to prevent that — we just want to verify validation passes. + with mock.patch( + "tinyml_modelmaker.run_tinyml_modelmaker.main" + ) as mock_main: + # Instead of running the real main, test the validation logic directly + pass + + # Direct test: extract the validation logic + import tinyml_modelmaker + task_type = "generic_timeseries_classification" + task_category = tinyml_modelmaker.get_task_category_type_from_task_type(task_type) + target_module = tinyml_modelmaker.get_target_module_from_task_type(task_type) + ai_target_module = tinyml_modelmaker.ai_modules.get_target_module(target_module) + + model_name = "NAS_m" + nas_enabled = True + model_description = ai_target_module.runner.ModelRunner.get_model_description(model_name) + + # Model should NOT be in catalog + assert model_description is None + + # But NAS guard should prevent rejection + should_reject = (model_description is None and not nas_enabled) + assert should_reject is False + + def test_nas_fallback_model_description(self): + """When NAS is enabled and model is not in catalog, a fallback description should be generated.""" + import tinyml_modelmaker + task_type = "generic_timeseries_classification" + target_module = tinyml_modelmaker.get_target_module_from_task_type(task_type) + ai_target_module = tinyml_modelmaker.ai_modules.get_target_module(target_module) + + model_name = "NAS_xl" + nas_enabled = True + model_description = ai_target_module.runner.ModelRunner.get_model_description(model_name) + assert model_description is None + + # Simulate the fallback logic from run_tinyml_modelmaker.py + if nas_enabled and model_description is None: + model_description = { + 'common': {'generic_model': True}, + 'training': { + 'training_backend': 'tinyml_tinyverse', + 'model_training_id': model_name, + }, + } + + assert model_description is not None + assert model_description['common']['generic_model'] is True + assert model_description['training']['training_backend'] == 'tinyml_tinyverse' + assert model_description['training']['model_training_id'] == 'NAS_xl' + + def test_nas_model_description_update_safe(self): + """params.update(model_description or {}) should not crash with None.""" + model_description = None + safe = model_description or {} + assert safe == {} + # Non-None case should pass through + model_description = {'training': {'training_backend': 'tinyml_tinyverse'}} + safe = model_description or {} + assert safe == model_description + + def test_known_model_still_works(self): + """A real model name should still pass validation as before (regression test).""" + import tinyml_modelmaker + task_type = "generic_timeseries_classification" + target_module = tinyml_modelmaker.get_target_module_from_task_type(task_type) + ai_target_module = tinyml_modelmaker.ai_modules.get_target_module(target_module) + + # Pick a known model from the catalog + all_models = ai_target_module.runner.ModelRunner.get_model_description + # RES_CAT_CNN_TS_GEN_BASE_3K should exist + desc = all_models("RES_CAT_CNN_TS_GEN_BASE_3K") + if desc is not None: + assert 'training' in desc + assert 'training_backend' in desc['training'] + + +class TestNASEnabledFlag: + """Test the nas_enabled boolean handling (the str2bool fix).""" + + def test_str2bool_returns_bool(self): + """str2bool should return a Python bool, not a string.""" + from tinyml_tinyverse.common.utils.misc_utils import str2bool + assert str2bool("True") is True + assert str2bool("true") is True + assert str2bool("1") is True + assert str2bool("False") is False + assert str2bool("false") is False + assert str2bool("0") is False + + def test_bool_true_is_truthy(self): + """Boolean True should be truthy (the fix: `if args.nas_enabled:` works).""" + # This is what the fixed code does + assert bool(True) # truthy check + # This is what the OLD buggy code did + assert (True == 'True') is False # string comparison fails! + + def test_nas_enabled_argparse_integration(self): + """Verify that the train script's argparse correctly converts nas_enabled to bool.""" + from tinyml_tinyverse.references.timeseries_classification.train import get_args_parser + parser = get_args_parser() + # Simulate what modelmaker passes: --nas_enabled True + # --sampling-rate is required by the base parser + args = parser.parse_args([ + '--nas_enabled', 'True', + '--data-path', '/tmp', + '--sampling-rate', '16000', + ]) + assert args.nas_enabled is True + assert isinstance(args.nas_enabled, bool) + # The truthy check should work + assert args.nas_enabled # if args.nas_enabled: → True + + def test_nas_disabled_argparse_integration(self): + """When nas_enabled is False, it should be falsy.""" + from tinyml_tinyverse.references.timeseries_classification.train import get_args_parser + parser = get_args_parser() + args = parser.parse_args([ + '--nas_enabled', 'False', + '--data-path', '/tmp', + '--sampling-rate', '16000', + ]) + assert args.nas_enabled is False + assert not args.nas_enabled # if args.nas_enabled: → False diff --git a/tinyml-modelmaker/tinyml_modelmaker/ai_modules/timeseries/training/tinyml_tinyverse/timeseries_base.py b/tinyml-modelmaker/tinyml_modelmaker/ai_modules/timeseries/training/tinyml_tinyverse/timeseries_base.py index 1809e5a5..06d76ece 100644 --- a/tinyml-modelmaker/tinyml_modelmaker/ai_modules/timeseries/training/tinyml_tinyverse/timeseries_base.py +++ b/tinyml-modelmaker/tinyml_modelmaker/ai_modules/timeseries/training/tinyml_tinyverse/timeseries_base.py @@ -429,82 +429,60 @@ def get_forecasting_log_summary_regex(): def get_anomaly_detection_log_summary_regex(): """ Returns the log summary regex patterns for anomaly detection tasks. - Extracts epoch numbers, training loss, validation loss, and best epoch data from training logs. - - Log format examples: - - INFO: root.utils.MetricLogger.FloatTrain: Training - Epoch[0]: [ 0/188] loss: 1.3182 (1.3182) - - INFO: root.utils.MetricLogger.FloatTrain: Training - Epoch[0]: Total time: 0:00:00 - - INFO: root.train_utils.train.FloatTrain: Training - Epoch[0]: MSE 0.523456 - - INFO: root.utils.MetricLogger.FloatTrain: Validation - Epoch[0]: [ 0/38] loss: 1.1205 (1.1205) - - INFO: root.utils.MetricLogger.FloatTrain: Validation - Epoch[0]: Total time: 0:00:00 - - INFO: root.train_utils.evaluate.FloatTrain: Validation - Epoch[0]: MSE 0.412300 - - INFO: root.main.FloatTrain.BestEpoch: Best Epoch: 46 - - INFO: root.main.FloatTrain.BestEpoch: MSE 0.008 + Extracts MSE metrics from training logs (best epoch only, as per-epoch validation logging is not performed). """ return { 'js': [ - # Floating Point Training Metrics (per epoch) + # Floating Point Training Metrics {'type': 'Epoch (FloatTrain)', 'name': 'Epoch (FloatTrain)', 'description': 'Epochs (FloatTrain)', 'unit': 'Epoch', 'value': None, 'regex': [ - {'op': 'search', 'pattern': r'FloatTrain:\s+Training\s+-\s+Epoch\[(?\d+)\]:\s+Total', 'groupId': 'eid'}], - }, - {'type': 'Training MSE Loss(FloatTrain)', 'name': 'Training MSE Loss(FloatTrain)', - 'description': 'Training MSE Loss per Epoch (FloatTrain)', 'unit': 'MSE', 'value': None, - 'regex': [{'op': 'search', - 'pattern': r'FloatTrain:\s+Training\s+-\s+Epoch\[\d+\]:\s+MSE\s+(?[-+e\d+\.\d+]+)', - 'groupId': 'mse', 'scale_factor': 1}], + {'op': 'search', 'pattern': r'FloatTrain:.*?Epoch:\s+\[(?\d+)\]\s+Total', 'groupId': 'eid'}], }, - {'type': 'Validation MSE Loss (FloatTrain)', 'name': 'Validation MSE Loss (FloatTrain)', - 'description': 'Validation MSE Loss per Epoch (FloatTrain)', 'unit': 'MSE', 'value': None, + {'type': 'Training Loss (FloatTrain)', 'name': 'Loss (FloatTrain)', + 'description': 'Training Loss (FloatTrain)', 'unit': 'Loss', 'value': None, 'regex': [{'op': 'search', - 'pattern': r'FloatTrain:\s+Validation\s+-\s+Epoch\[\d+\]:\s+MSE\s+(?[-+e\d+\.\d+]+)', - 'groupId': 'mse', 'scale_factor': 1}], + 'pattern': r'FloatTrain:.*?Training.*?Epoch\[\d+\].*?loss:\s+(?\d+\.\d+)', + 'groupId': 'loss'}], }, - # Quantized Training Metrics (per epoch) + # Quantized Training Metrics {'type': 'Epoch (QuantTrain)', 'name': 'Epoch (QuantTrain)', 'description': 'Epochs (QuantTrain)', 'unit': 'Epoch', 'value': None, 'regex': [ - {'op': 'search', 'pattern': r'QuantTrain:\s+Training\s+-\s+Epoch\[(?\d+)\]:\s+Total', 'groupId': 'eid'}], - }, - {'type': 'Training MSE Loss(QuantTrain)', 'name': 'Training MSE Loss(QuantTrain)', - 'description': 'Training MSE Loss per Epoch (QuantTrain)', 'unit': 'MSE', 'value': None, - 'regex': [{'op': 'search', - 'pattern': r'QuantTrain:\s+Training\s+-\s+Epoch\[\d+\]:\s+MSE\s+(?[-+e\d+\.\d+]+)', - 'groupId': 'mse', 'scale_factor': 1}], + {'op': 'search', 'pattern': r'QuantTrain:.*?Epoch:\s+\[(?\d+)\]\s+Total', 'groupId': 'eid'}], }, - {'type': 'Validation MSE Loss (QuantTrain)', 'name': 'Validation MSE Loss (QuantTrain)', - 'description': 'Validation MSE Loss per Epoch (QuantTrain)', 'unit': 'MSE', 'value': None, + {'type': 'Training Loss (QuantTrain)', 'name': 'Loss (QuantTrain)', + 'description': 'Training Loss (QuantTrain)', 'unit': 'Loss', 'value': None, 'regex': [{'op': 'search', - 'pattern': r'QuantTrain:\s+Validation\s+-\s+Epoch\[\d+\]:\s+MSE\s+(?[-+e\d+\.\d+]+)', - 'groupId': 'mse', 'scale_factor': 1}], + 'pattern': r'QuantTrain:.*?Training.*?Epoch\[\d+\].*?loss:\s+(?\d+\.\d+)', + 'groupId': 'loss'}], }, # Best Epoch FloatTrain Metrics {'type': 'Epoch (FloatTrain, BestEpoch)', 'name': 'Epoch (FloatTrain, BestEpoch)', - 'description': 'Best Epoch Number (FloatTrain)', + 'description': 'Epochs (FloatTrain, BestEpoch)', 'unit': 'Epoch', 'value': None, 'regex': [ - {'op': 'search', 'pattern': r'FloatTrain\.BestEpoch:\s+Best\s+Epoch:\s+(?\d+)', + {'op': 'search', 'pattern': r'FloatTrain.BestEpoch\s*: Best Epoch:\s+(?\d+)', 'groupId': 'eid'}], }, {'type': 'MSE (FloatTrain, BestEpoch)', 'name': 'MSE (FloatTrain, BestEpoch)', - 'description': 'Best Epoch MSE (FloatTrain)', 'unit': 'MSE', 'value': None, + 'description': 'MSE (FloatTrain, BestEpoch)', 'unit': 'MSE', 'value': None, 'regex': [ - {'op': 'search', 'pattern': r'FloatTrain\.BestEpoch:\s+MSE\s+(?[-+e\d+\.\d+]+)', + {'op': 'search', 'pattern': r'FloatTrain.BestEpoch\s*: MSE\s+(?[-+e\d+\.\d+]+)', 'groupId': 'mse', 'scale_factor': 1}], }, # Best Epoch QuantTrain Metrics {'type': 'Epoch (QuantTrain, BestEpoch)', 'name': 'Epoch (QuantTrain, BestEpoch)', - 'description': 'Best Epoch Number (QuantTrain)', + 'description': 'Epochs (QuantTrain, BestEpoch)', 'unit': 'Epoch', 'value': None, 'regex': [ - {'op': 'search', 'pattern': r'QuantTrain\.BestEpoch:\s+Best\s+Epoch:\s+(?\d+)', + {'op': 'search', 'pattern': r'QuantTrain.BestEpoch\s*: Best Epoch:\s+(?\d+)', 'groupId': 'eid'}], }, {'type': 'MSE (QuantTrain, BestEpoch)', 'name': 'MSE (QuantTrain, BestEpoch)', - 'description': 'Best Epoch MSE (QuantTrain)', 'unit': 'MSE', 'value': None, + 'description': 'MSE (QuantTrain, BestEpoch)', 'unit': 'MSE', 'value': None, 'regex': [ - {'op': 'search', 'pattern': r'QuantTrain\.BestEpoch:\s+MSE\s+(?[-+e\d+\.\d+]+)', + {'op': 'search', 'pattern': r'QuantTrain.BestEpoch\s*: MSE\s+(?[-+e\d+\.\d+]+)', 'groupId': 'mse', 'scale_factor': 1}], }, ] @@ -526,7 +504,7 @@ def create_template_model_description(task_category, task_type, dataset_loader=N """ training_dict = dict( quantization=TinyMLQuantizationVersion.QUANTIZATION_TINPU, - training_backend='tinyml_tinyverse', + training_backend=constants.TRAINING_BACKEND_TINYML_TINYVERSE, model_training_id='', model_name='', learning_rate=2e-3, @@ -671,12 +649,23 @@ def _init_task_specific_params(self): def _get_device(self): """ - Determine the training device based on GPU availability. + Determine the training device based on configuration and hardware. + + Priority order: + 1. Explicit ``training_device`` in config (mps / cuda / cpu) + 2. Auto-detect: MPS if available, else CUDA, else CPU Returns: tuple: (device string, distributed flag) """ distributed = 1 if self.params.training.num_gpus > 1 else 0 + + explicit = getattr(self.params.training, 'training_device', None) + if explicit and explicit not in ('auto', constants.TRAINING_DEVICE_CUDA): + # User explicitly chose a device — honour it. + return explicit, distributed + + # Auto-detect device = 'cpu' if self.params.training.num_gpus > 0: if torch.backends.mps.is_available(): @@ -692,7 +681,7 @@ def _build_common_train_argv(self, device, distributed): Returns: list: Common training arguments """ - return [ + argv = [ '--model', f'{self.params.training.model_training_id}', '--dual-op', f'{self.params.training.dual_op}', '--model-config', f'{self.params.training.model_config}', @@ -738,12 +727,14 @@ def _build_common_train_argv(self, device, distributed): '--ondevice-training', f'{self.params.training.ondevice_training}', '--partial-quantization', f'{self.params.training.partial_quantization}', '--trainable_layers_from_last', f'{self.params.training.trainable_layers_from_last}', + '--compile-model', f'{getattr(self.params.training, "compile_model", 0)}', '--data-path', os.path.join(self.params.dataset.dataset_path, self.params.dataset.data_dir), '--store-feat-ext-data', f'{self.params.data_processing_feature_extraction.store_feat_ext_data}', '--epochs', f'{self.params.training.training_epochs}', '--lr', f'{self.params.training.learning_rate}', '--output-dir', f'{self.params.training.training_path}', ] + return argv def _get_task_specific_train_argv(self): """ @@ -851,10 +842,27 @@ def run(self, **kwargs): # Insert task-specific args before the last 10 items argv = argv[:-10] + task_argv + argv[-10:] + # Collect standalone boolean flags (store_true args have no value). + # These must be stripped before argv slicing (which uses fixed offsets + # for trailing key-value pairs) and re-appended after. + bool_flags = [] + # AMP (mixed-precision) — only enable when the user explicitly passes + # --native-amp. On MPS, autocast defaults to float16 but GradScaler + # is unsupported, so small TinyML models suffer gradient underflow + # (all predictions collapse to one class). Do NOT auto-enable. + native_amp = getattr(self.params.training, 'native_amp', None) + if native_amp: + bool_flags.append('--native-amp') + argv.extend(bool_flags) + args = self.train_module.get_args_parser().parse_args(argv) args.quit_event = self.quit_event if not utils.misc_utils.str2bool(self.params.testing.skip_train): + # Strip boolean flags before argv manipulation so fixed offsets remain correct + for flag in bool_flags: + argv.remove(flag) + if utils.misc_utils.str2bool(self.params.training.run_quant_train_only): if self.params.training.quantization != TinyMLQuantizationVersion.NO_QUANTIZATION: argv = argv[:-2] # Remove --output-dir @@ -865,6 +873,7 @@ def run(self, **kwargs): '--weight-bitwidth', f'{self.params.training.quantization_weight_bitwidth}', '--activation-bitwidth', f'{self.params.training.quantization_activation_bitwidth}', ]) + argv.extend(bool_flags) args = self.train_module.get_args_parser().parse_args(argv) args.quit_event = self.quit_event @@ -872,6 +881,7 @@ def run(self, **kwargs): else: raise ValueError(f"quantization cannot be {TinyMLQuantizationVersion.NO_QUANTIZATION} if run_quant_train_only argument is chosen") else: + argv.extend(bool_flags) self.train_module.run(args) if utils.misc_utils.str2bool(self.params.data_processing_feature_extraction.store_feat_ext_data) and \ @@ -879,6 +889,9 @@ def run(self, **kwargs): return self.params if self.params.training.quantization != TinyMLQuantizationVersion.NO_QUANTIZATION: + # Strip boolean flags again before quant argv manipulation + for flag in bool_flags: + argv.remove(flag) # Remove trailing arguments for quant training argv = argv[:-8] # Remove --store-feat-ext-data, --epochs, --lr, --output-dir pairs @@ -899,6 +912,7 @@ def run(self, **kwargs): '--lr-warmup-epochs', '0', '--store-feat-ext-data', 'False' ]) + argv.extend(bool_flags) args = self.train_module.get_args_parser().parse_args(argv) args.quit_event = self.quit_event diff --git a/tinyml-modelmaker/tinyml_modelmaker/run_tinyml_modelmaker.py b/tinyml-modelmaker/tinyml_modelmaker/run_tinyml_modelmaker.py index 6aad1660..a8e10646 100644 --- a/tinyml-modelmaker/tinyml_modelmaker/run_tinyml_modelmaker.py +++ b/tinyml-modelmaker/tinyml_modelmaker/run_tinyml_modelmaker.py @@ -30,11 +30,14 @@ import argparse import json +import logging import os import sys import yaml +logger = logging.getLogger(__name__) + def main(config): target_device = config['common']['target_device'] @@ -51,7 +54,7 @@ def main(config): else: target_module = tinyml_modelmaker.get_target_module_from_task_type(task_type) if target_module is None: - print(f"Error: Could not infer target_module from task_type '{task_type}'. Please specify 'target_module' in config.") + logger.error(f"Could not infer target_module from task_type '{task_type}'. Please specify 'target_module' in config.") return False config['common']['target_module'] = target_module @@ -64,11 +67,22 @@ def main(config): params = ai_target_module.runner.ModelRunner.init_params() # get pretrained model for the given model_name model_name = config['training']['model_name'] + nas_enabled = config.get('training', {}).get('nas_enabled', False) model_description = ai_target_module.runner.ModelRunner.get_model_description(model_name) if config.get('training').get('enable', True): - if model_description is None: - print(f"please check if the given model_name is a supported one: {model_name}") + if model_description is None and not nas_enabled: + logger.error(f"please check if the given model_name is a supported one: {model_name}") return False + # When NAS is enabled, provide a minimal model description so the pipeline + # can locate the correct training module and treat it as a generic model. + if nas_enabled and model_description is None: + model_description = { + 'common': {'generic_model': True}, + 'training': { + 'training_backend': 'tinyml_tinyverse', + 'model_training_id': model_name, # e.g. 'NAS_m' + }, + } dataset_preset_descriptions = ai_target_module.runner.ModelRunner.get_dataset_preset_descriptions(params) dataset_preset_name = ai_target_module.constants.DATASET_DEFAULT @@ -91,20 +105,20 @@ def main(config): if 'compile_preset_name' in config['compilation']: compilation_preset_name = config['compilation']['compile_preset_name'] if compilation_preset_name not in preset_descriptions[target_device][task_type].keys(): - print(f'WARNING: Using "default_preset" for compilation since user choice-"{compilation_preset_name}" is unavailable') + logger.warning(f'Using "default_preset" for compilation since user choice-"{compilation_preset_name}" is unavailable') compilation_preset_name = 'default_preset' compilation_preset_description = preset_descriptions[target_device][task_type][compilation_preset_name] # update the params with model_description, preset and config - params = params.update(model_description).update(dataset_preset_description).update(feature_extraction_preset_description).update(compilation_preset_description).update(config) + params = params.update(model_description or {}).update(dataset_preset_description).update(feature_extraction_preset_description).update(compilation_preset_description).update(config) # create the runner model_runner = ai_target_module.runner.ModelRunner(params) # prepare run_params_file = model_runner.prepare() - print(f'Run params is at: {run_params_file}') + logger.info(f'Run params is at: {run_params_file}') # run model_runner.run() @@ -112,7 +126,7 @@ def main(config): if __name__ == '__main__': - print(f'argv: {sys.argv}') + logger.info(f'argv: {sys.argv}') # the cwd must be the root of the repository if os.path.split(os.getcwd())[-1] == 'tinyml_modelmaker': os.chdir('..') @@ -136,7 +150,7 @@ def main(config): elif args.config_file.endswith('.json'): config = json.load(fp) else: - assert False, f'unrecognized config file extension for {args.config_file}' + raise ValueError(f'unrecognized config file extension for {args.config_file}') # # diff --git a/tinyml-modeloptimization/torchmodelopt/tinyml_torchmodelopt/nas/train_cnn_search.py b/tinyml-modeloptimization/torchmodelopt/tinyml_torchmodelopt/nas/train_cnn_search.py index dbf42a4c..2a5cc6dc 100644 --- a/tinyml-modeloptimization/torchmodelopt/tinyml_torchmodelopt/nas/train_cnn_search.py +++ b/tinyml-modeloptimization/torchmodelopt/tinyml_torchmodelopt/nas/train_cnn_search.py @@ -8,33 +8,36 @@ from .model_search_cnn import Network as TrainNetwork # Import the search-phase network from .model import Network # Import the final evaluation network from .architect import Architect # Import the NAS architect -from .utils import count_parameters_in_MB, AvgrageMeter, accuracy # Utility functions +from .utils import count_parameters_in_MB, AvgrageMeter, accuracy, get_device # Utility functions + def search_and_get_model(args): """ Runs the neural architecture search (NAS) process and returns the best found model. Args: args: Namespace containing all hyperparameters and data loaders. + args.gpu (int): GPU index (used for CUDA). Ignored for MPS/CPU. Returns: eval_model: The final model with the best found architecture. """ logger = logging.getLogger("root.modelopt.nas.search") - # Check for GPU availability - if not torch.cuda.is_available(): - logger.error('Since no GPU is available, NAS will not be performed. NAS is a highly compute intensive operation, and might completely clog your CPU') - # print('no GPU available') - return None - - torch.cuda.set_device(args.gpu) # Set the CUDA device - cudnn.benchmark = True # Enable cudnn autotuner for faster training - cudnn.enabled = True # Enable cudnn + # Resolve the compute device (cuda, mps, or cpu) + device = get_device(getattr(args, 'gpu', 0)) + args._nas_device = device # Store for use in architect / train / infer - # logger.info('gpu device = %d' % args.gpu) - # logger.info("args = %s", args) - - criterion = nn.CrossEntropyLoss() # Define the loss function - criterion = criterion.cuda() # Move loss to GPU + if device.type == 'cpu': + logger.warning( + 'NAS running on CPU. This is extremely slow — ' + 'consider using a CUDA or MPS-capable machine.' + ) + + if device.type == 'cuda': + torch.cuda.set_device(device) + cudnn.benchmark = True + cudnn.enabled = True + + criterion = nn.CrossEntropyLoss().to(device) # Define + move loss # Instantiate the search-phase network (with architecture parameters) model = TrainNetwork( args.nas_init_channels, @@ -44,12 +47,12 @@ def search_and_get_model(args): args.in_channels, args.nas_nodes_per_layer, args.nas_multiplier, - args.nas_stem_multiplier + args.nas_stem_multiplier, + device=device, ) - model = model.cuda() # Move model to GPU - logger.info("param size = %fMB", count_parameters_in_MB(model)) # Log model size - # print(f"param size = {count_parameters_in_MB(model)}MB") - + model = model.to(device) # Move model to device + logger.info("param size = %fMB", count_parameters_in_MB(model)) + # Optimizer for model weights (not architecture parameters) optimizer = torch.optim.SGD( model.parameters(), @@ -70,44 +73,44 @@ def search_and_get_model(args): architect = Architect(model, args) # Instantiate the architect for NAS - best_genotype = None # Track the best found genotype - + best_genotype = None # Track the best found genotype + best_valid_acc = 0.0 # Track the best validation accuracy + # Main NAS loop for epoch in range(args.nas_budget): lr = scheduler.get_last_lr()[0] # Get current learning rate - # logger.info('Epoch %d lr %f', epoch, lr) - # print(f'Epoch: {epoch} \t LR: {lr}') - + genotype = model.genotype() # Get current architecture genotype logger.info('genotype = %s', genotype) - # print(f'genotype = {genotype}') - - # print(F.softmax(model.alphas_normal, dim=-1)) - # print(F.softmax(model.alphas_reduce, dim=-1)) - + # Training step (updates model weights and architecture parameters) train_acc = train(args, epoch, train_loader, valid_loader, model, architect, criterion, optimizer, lr) logger.info('Train: Acc@1 %f', train_acc) - # print('train_acc:', train_acc) - + # Validation step (evaluate current architecture) valid_acc = infer(args, epoch, valid_loader, model, criterion) logger.info('Test: Acc@1 %f', valid_acc) - # print('valid_acc: ', valid_acc) - best_genotype = genotype # Update best genotype (could add selection logic) + # Keep the genotype with the best validation accuracy + if valid_acc > best_valid_acc: + best_valid_acc = valid_acc + best_genotype = genotype + logger.info('New best genotype at epoch %d (Acc@1 %f)', epoch, valid_acc) scheduler.step() # Update learning rate - - # save(model, os.path.join(args.save, 'weights.pt')) - # Instantiate the final evaluation model with the best found genotype + # Instantiate the final evaluation model with the best found genotype, + # passing the same structural parameters used during search to ensure + # the final model matches the searched architecture exactly. eval_model = Network( args.nas_init_channels, args.num_classes, args.nas_layers, best_genotype, - args.in_channels + args.in_channels, + steps=args.nas_nodes_per_layer, + multiplier=args.nas_multiplier, + stem_multiplier=args.nas_stem_multiplier, ) return eval_model @@ -131,21 +134,34 @@ def train(args, epoch, train_loader, valid_loader, model, architect, criterion, objs = AvgrageMeter() # Tracks average loss top1 = AvgrageMeter() # Tracks average top-1 accuracy + device = args._nas_device + start_time = time.time() - torch.cuda.reset_peak_memory_stats() - + if device.type == 'cuda': + torch.cuda.reset_peak_memory_stats() + + # Create a persistent iterator over the validation set so that + # successive architecture-update steps cycle through different batches + # instead of always reusing the first batch. + valid_iter = iter(valid_loader) + for step, (input_raw, input, target) in enumerate(train_loader): model.train() # Set model to training mode n = input.size(0) # Batch size - - # Move input and target to GPU and set types - input = Variable(input, requires_grad=False).cuda().float() - target = Variable(target, requires_grad=False).cuda().long() - - # Get a batch from the validation set for architecture step - input_raw, input_search, target_search = next(iter(valid_loader)) - input_search = Variable(input_search, requires_grad=False).cuda().float() - target_search = Variable(target_search, requires_grad=False).cuda().long() + + # Move input and target to device and set types + input = Variable(input.float(), requires_grad=False).to(device) + target = Variable(target.long(), requires_grad=False).to(device) + + # Get a batch from the validation set for architecture step, + # cycling back to the start when the validation set is exhausted. + try: + input_raw, input_search, target_search = next(valid_iter) + except StopIteration: + valid_iter = iter(valid_loader) + input_raw, input_search, target_search = next(valid_iter) + input_search = Variable(input_search.float(), requires_grad=False).to(device) + target_search = Variable(target_search.long(), requires_grad=False).to(device) # Update architecture parameters (unrolled or standard) architect.step( @@ -170,13 +186,12 @@ def train(args, epoch, train_loader, valid_loader, model, architect, criterion, samples = (step + 1) * input.size(0) samples_per_sec = samples / elapsed step_time = elapsed / (step + 1) - max_mem_mb = torch.cuda.max_memory_allocated() / (1024 * 1024) + max_mem_mb = torch.cuda.max_memory_allocated() / (1024 * 1024) if device.type == 'cuda' else 0 estimated_total = elapsed / (step + 1) * len(train_loader) eta_seconds = max(0, estimated_total - elapsed) eta_str = time.strftime('%H:%M:%S', time.gmtime(eta_seconds)) logger.info('Epoch: [%d] [%03d/%03d] eta: %s lr: %f samples/s: %f loss: %.2f acc1: %.2f time: %f max_mem: %f', epoch, step, len(train_loader), eta_str, lr, samples_per_sec, objs.avg, top1.avg, step_time, max_mem_mb) - # print(f'train {round(step, 3)} {objs.avg} {top1.avg}') - + return top1.avg # Return average top-1 accuracy def infer(args, epoch, valid_loader, model, criterion): @@ -191,17 +206,19 @@ def infer(args, epoch, valid_loader, model, criterion): float: Top-1 validation accuracy. """ logger = logging.getLogger("root.modelopt.nas.infer") + device = args._nas_device objs = AvgrageMeter() # Tracks average loss top1 = AvgrageMeter() # Tracks average top-1 accuracy model.eval() # Set model to evaluation mode start_time = time.time() - torch.cuda.reset_peak_memory_stats() + if device.type == 'cuda': + torch.cuda.reset_peak_memory_stats() for step, (input_raw, input, target) in enumerate(valid_loader): with torch.no_grad(): - input = input.cuda().float() # Move input to GPU - target = target.cuda().long() # Move target to GPU + input = input.float().to(device) # Cast then move to device + target = target.long().to(device) # Cast then move to device logits = model(input) # Forward pass loss = criterion(logits, target) # Compute loss @@ -216,12 +233,11 @@ def infer(args, epoch, valid_loader, model, criterion): samples = (step + 1) * input.size(0) samples_per_sec = samples / elapsed step_time = elapsed / (step + 1) - max_mem_mb = torch.cuda.max_memory_allocated() / (1024 * 1024) + max_mem_mb = torch.cuda.max_memory_allocated() / (1024 * 1024) if device.type == 'cuda' else 0 estimated_total = elapsed / (step + 1) * len(valid_loader) eta_seconds = max(0, estimated_total - elapsed) eta_str = time.strftime('%H:%M:%S', time.gmtime(eta_seconds)) logger.info('Epoch: [%d] [%03d/%03d] eta: %s samples/s: %f loss: %.2f acc1: %.2f time: %f max_mem: %f', epoch, step, len(valid_loader), eta_str, samples_per_sec, objs.avg, top1.avg, step_time, max_mem_mb) - # print(f'valid {round(step, 3)} {objs.avg} {top1.avg}') return top1.avg # Return average top-1 accuracy diff --git a/tinyml-modelzoo/tinyml_modelzoo/model_descriptions/classification.py b/tinyml-modelzoo/tinyml_modelzoo/model_descriptions/classification.py index 6a9addcc..97d4f5ad 100644 --- a/tinyml-modelzoo/tinyml_modelzoo/model_descriptions/classification.py +++ b/tinyml-modelzoo/tinyml_modelzoo/model_descriptions/classification.py @@ -532,7 +532,7 @@ model_training_id='CNN_AF_3L_1400', model_name='ArcFault_model_1400_t', learning_rate=0.04, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_af_3l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_ARC_FAULT], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=0) | (DEVICE_RUN_INFO['ArcFault_model_1400_t'][constants.TARGET_DEVICE_F280013]), @@ -565,7 +565,7 @@ model_training_id='CNN_AF_3L_700', model_name='ArcFault_model_700_t', learning_rate=0.04, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_af_3l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_ARC_FAULT], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=1) | (DEVICE_RUN_INFO['ArcFault_model_700_t'][constants.TARGET_DEVICE_F280013]), @@ -599,7 +599,7 @@ model_training_id='CNN_AF_3L_300', model_name='ArcFault_model_300_t', learning_rate=0.04, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_af_3l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_ARC_FAULT], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=2) | (DEVICE_RUN_INFO['ArcFault_model_300_t'][constants.TARGET_DEVICE_F280013]), @@ -632,7 +632,7 @@ model_training_id='CNN_AF_3L_200', model_name='ArcFault_model_200_t', learning_rate=0.04, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_af_3l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_ARC_FAULT], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=3) | (DEVICE_RUN_INFO['ArcFault_model_200_t'][constants.TARGET_DEVICE_F280013]), @@ -665,7 +665,7 @@ model_training_id='CNN_MF_1L', model_name='MotorFault_model_1_t', learning_rate=0.01, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_mf_1l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_MOTOR_FAULT], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=2) | (DEVICE_RUN_INFO['MotorFault_model_1_t'][constants.TARGET_DEVICE_F280013]), @@ -698,7 +698,7 @@ model_training_id='CNN_MF_2L', model_name='MotorFault_model_2_t', learning_rate=0.01, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_mf_2l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_MOTOR_FAULT], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=0) | (DEVICE_RUN_INFO['MotorFault_model_2_t'][constants.TARGET_DEVICE_F280013]), @@ -731,7 +731,7 @@ model_training_id='CNN_MF_3L', model_name='MotorFault_model_3_t', learning_rate=0.01, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_mf_3l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_MOTOR_FAULT], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=1) | (DEVICE_RUN_INFO['MotorFault_model_3_t'][constants.TARGET_DEVICE_F280013]), @@ -763,7 +763,7 @@ model_training_id='CNN_MF_1L', model_name='FanImbalance_model_1_t', learning_rate=0.01, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_mf_1l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_BLOWER_IMBALANCE], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=2) | (DEVICE_RUN_INFO['FanImbalance_model_1_t'][constants.TARGET_DEVICE_F280013]), @@ -792,7 +792,7 @@ model_training_id='CNN_MF_2L', model_name='FanImbalance_model_2_t', learning_rate=0.01, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_mf_2l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_BLOWER_IMBALANCE], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=0) | (DEVICE_RUN_INFO['FanImbalance_model_2_t'][constants.TARGET_DEVICE_F280013]), @@ -821,7 +821,7 @@ model_training_id='CNN_MF_3L', model_name='FanImbalance_model_3_t', learning_rate=0.01, - model_spec=os.path.join(repo_parent_path, 'tinyml-mlbackend', 'tinyml_proprietary_models', 'cnn_mf_3l.py'), + model_spec=None, # open-source implementation in tinyml_modelzoo.models.classification batch_size=constants.TRAINING_BATCH_SIZE_DEFAULT[constants.TASK_TYPE_BLOWER_IMBALANCE], target_devices={ constants.TARGET_DEVICE_F280013: dict(model_selection_factor=1) | (DEVICE_RUN_INFO['FanImbalance_model_3_t'][constants.TARGET_DEVICE_F280013]), diff --git a/tinyml-modelzoo/tinyml_modelzoo/models/classification.py b/tinyml-modelzoo/tinyml_modelzoo/models/classification.py index 5ea21493..28892e77 100644 --- a/tinyml-modelzoo/tinyml_modelzoo/models/classification.py +++ b/tinyml-modelzoo/tinyml_modelzoo/models/classification.py @@ -691,6 +691,221 @@ def gen_model_spec(self): return model_spec +# ============================================================================= +# APPLICATION-SPECIFIC MODELS +# Open-source implementations of application-specific CNN architectures +# for arc fault detection, motor fault classification, and fan imbalance. +# ============================================================================= + + +class CNN_AF_3L_200(GenericModelWithSpec): + """ + 3-Layer CNN for Arc Fault Detection — ~200 parameter variant. + + Smallest and fastest of the arc fault models. Best suited for + simple fault patterns where inference speed is critical. + + Architecture: BatchNorm -> Conv3x1(4) -> Conv3x1(4) -> Conv3x1(8) -> AvgPool -> FC + ~200 parameters (with 1 input variable, 2 classes) + """ + def __init__(self, config, input_features=256, variables=1, num_classes=2): + super().__init__(config, input_features=input_features, variables=variables, + num_classes=num_classes) + self.model_spec = self.gen_model_spec() + self._init_model_from_spec(model_spec=self.model_spec, variables=self.variables, + input_features=self.input_features, num_classes=self.num_classes) + + def gen_model_spec(self): + layers = py_utils.DictPlus() + layers += {'0': dict(type='BatchNormLayer', num_features=self.variables)} + layers += {'1': dict(type='ConvBNReLULayer', in_channels=self.variables, out_channels=4, kernel_size=(3, 1), stride=(1, 1))} + layers += {'2': dict(type='ConvBNReLULayer', in_channels=4, out_channels=4, kernel_size=(3, 1), stride=(1, 1))} + layers += {'3': dict(type='ConvBNReLULayer', in_channels=4, out_channels=8, kernel_size=(3, 1), stride=(1, 1))} + layers += {'4': dict(type='AdaptiveAvgPoolLayer', output_size=(1, 1))} + layers += {'5': dict(type='ReshapeLayer', ndim=2)} + layers += {'6': dict(type='LinearLayer', in_features=None, out_features=self.num_classes)} + model_spec = dict(model_spec=layers) + return model_spec + + +class CNN_AF_3L_300(GenericModelWithSpec): + """ + 3-Layer CNN for Arc Fault Detection — ~300 parameter variant. + + Slightly larger than the 200-param variant. Handles moderately + complex fault patterns while maintaining fast inference. + + Architecture: BatchNorm -> Conv5x1(4) -> Conv5x1(4) -> Conv5x1(8) -> AvgPool -> FC + ~300 parameters (with 1 input variable, 2 classes) + """ + def __init__(self, config, input_features=256, variables=1, num_classes=2): + super().__init__(config, input_features=input_features, variables=variables, + num_classes=num_classes) + self.model_spec = self.gen_model_spec() + self._init_model_from_spec(model_spec=self.model_spec, variables=self.variables, + input_features=self.input_features, num_classes=self.num_classes) + + def gen_model_spec(self): + layers = py_utils.DictPlus() + layers += {'0': dict(type='BatchNormLayer', num_features=self.variables)} + layers += {'1': dict(type='ConvBNReLULayer', in_channels=self.variables, out_channels=4, kernel_size=(5, 1), stride=(1, 1))} + layers += {'2': dict(type='ConvBNReLULayer', in_channels=4, out_channels=4, kernel_size=(5, 1), stride=(1, 1))} + layers += {'3': dict(type='ConvBNReLULayer', in_channels=4, out_channels=8, kernel_size=(5, 1), stride=(1, 1))} + layers += {'4': dict(type='AdaptiveAvgPoolLayer', output_size=(1, 1))} + layers += {'5': dict(type='ReshapeLayer', ndim=2)} + layers += {'6': dict(type='LinearLayer', in_features=None, out_features=self.num_classes)} + model_spec = dict(model_spec=layers) + return model_spec + + +class CNN_AF_3L_700(GenericModelWithSpec): + """ + 3-Layer CNN for Arc Fault Detection — ~700 parameter variant. + + Sweet spot between inference speed and model capacity. + Recommended starting point for most arc fault applications. + + Architecture: BatchNorm -> Conv3x1(8) -> Conv3x1(8) -> Conv3x1(16) -> AvgPool -> FC + ~700 parameters (with 1 input variable, 2 classes) + """ + def __init__(self, config, input_features=256, variables=1, num_classes=2): + super().__init__(config, input_features=input_features, variables=variables, + num_classes=num_classes) + self.model_spec = self.gen_model_spec() + self._init_model_from_spec(model_spec=self.model_spec, variables=self.variables, + input_features=self.input_features, num_classes=self.num_classes) + + def gen_model_spec(self): + layers = py_utils.DictPlus() + layers += {'0': dict(type='BatchNormLayer', num_features=self.variables)} + layers += {'1': dict(type='ConvBNReLULayer', in_channels=self.variables, out_channels=8, kernel_size=(3, 1), stride=(1, 1))} + layers += {'2': dict(type='ConvBNReLULayer', in_channels=8, out_channels=8, kernel_size=(3, 1), stride=(1, 1))} + layers += {'3': dict(type='ConvBNReLULayer', in_channels=8, out_channels=16, kernel_size=(3, 1), stride=(1, 1))} + layers += {'4': dict(type='AdaptiveAvgPoolLayer', output_size=(1, 1))} + layers += {'5': dict(type='ReshapeLayer', ndim=2)} + layers += {'6': dict(type='LinearLayer', in_features=None, out_features=self.num_classes)} + model_spec = dict(model_spec=layers) + return model_spec + + +class CNN_AF_3L_1400(GenericModelWithSpec): + """ + 3-Layer CNN for Arc Fault Detection — ~1400 parameter variant. + + Largest and most accurate arc fault model. Best for complex data + scenarios where accuracy is more important than inference speed. + + Architecture: BatchNorm -> Conv5x1(8) -> Conv5x1(8) -> Conv5x1(16) -> AvgPool -> FC + ~1400 parameters (with 1 input variable, 2 classes) + """ + def __init__(self, config, input_features=256, variables=1, num_classes=2): + super().__init__(config, input_features=input_features, variables=variables, + num_classes=num_classes) + self.model_spec = self.gen_model_spec() + self._init_model_from_spec(model_spec=self.model_spec, variables=self.variables, + input_features=self.input_features, num_classes=self.num_classes) + + def gen_model_spec(self): + layers = py_utils.DictPlus() + layers += {'0': dict(type='BatchNormLayer', num_features=self.variables)} + layers += {'1': dict(type='ConvBNReLULayer', in_channels=self.variables, out_channels=8, kernel_size=(5, 1), stride=(1, 1))} + layers += {'2': dict(type='ConvBNReLULayer', in_channels=8, out_channels=16, kernel_size=(5, 1), stride=(1, 1))} + layers += {'3': dict(type='ConvBNReLULayer', in_channels=16, out_channels=8, kernel_size=(5, 1), stride=(1, 1))} + layers += {'4': dict(type='AdaptiveAvgPoolLayer', output_size=(1, 1))} + layers += {'5': dict(type='ReshapeLayer', ndim=2)} + layers += {'6': dict(type='LinearLayer', in_features=None, out_features=self.num_classes)} + model_spec = dict(model_spec=layers) + return model_spec + + +class CNN_MF_1L(GenericModelWithSpec): + """ + 1-Layer CNN for Motor Fault / Fan Imbalance Classification. + + Simplest of the motor fault models with ~600 parameters. + Single convolutional layer followed by global pooling. + + Architecture: BatchNorm -> Conv5x1(16) -> AvgPool -> FC + ~600 parameters (with 1 input variable, 4 classes) + """ + def __init__(self, config, input_features=256, variables=1, num_classes=4): + super().__init__(config, input_features=input_features, variables=variables, + num_classes=num_classes) + self.model_spec = self.gen_model_spec() + self._init_model_from_spec(model_spec=self.model_spec, variables=self.variables, + input_features=self.input_features, num_classes=self.num_classes) + + def gen_model_spec(self): + layers = py_utils.DictPlus() + layers += {'0': dict(type='BatchNormLayer', num_features=self.variables)} + layers += {'1': dict(type='ConvBNReLULayer', in_channels=self.variables, out_channels=16, kernel_size=(5, 1), stride=(1, 1))} + layers += {'2': dict(type='AdaptiveAvgPoolLayer', output_size=(8, 1))} + layers += {'3': dict(type='ReshapeLayer', ndim=2)} + layers += {'4': dict(type='LinearLayer', in_features=None, out_features=self.num_classes)} + model_spec = dict(model_spec=layers) + return model_spec + + +class CNN_MF_2L(GenericModelWithSpec): + """ + 2-Layer CNN for Motor Fault / Fan Imbalance Classification. + + Largest of the motor fault models with ~3000 parameters. + Two convolutional layers with wider channels for maximum accuracy. + Hardest to train but most capable. + + Architecture: BatchNorm -> Conv5x1(16) -> Conv5x1(32) -> AvgPool -> FC + ~3000 parameters (with 1 input variable, 4 classes) + """ + def __init__(self, config, input_features=256, variables=1, num_classes=4): + super().__init__(config, input_features=input_features, variables=variables, + num_classes=num_classes) + self.model_spec = self.gen_model_spec() + self._init_model_from_spec(model_spec=self.model_spec, variables=self.variables, + input_features=self.input_features, num_classes=self.num_classes) + + def gen_model_spec(self): + layers = py_utils.DictPlus() + layers += {'0': dict(type='BatchNormLayer', num_features=self.variables)} + layers += {'1': dict(type='ConvBNReLULayer', in_channels=self.variables, out_channels=16, kernel_size=(5, 1), stride=(1, 1))} + layers += {'2': dict(type='ConvBNReLULayer', in_channels=16, out_channels=32, kernel_size=(5, 1), stride=(1, 1))} + layers += {'3': dict(type='AdaptiveAvgPoolLayer', output_size=(4, 1))} + layers += {'4': dict(type='ReshapeLayer', ndim=2)} + layers += {'5': dict(type='LinearLayer', in_features=None, out_features=self.num_classes)} + model_spec = dict(model_spec=layers) + return model_spec + + +class CNN_MF_3L(GenericModelWithSpec): + """ + 3-Layer CNN for Motor Fault / Fan Imbalance Classification. + + Middle of the three motor fault models with ~1000 parameters. + Three narrow convolutional layers balance depth and efficiency. + + Architecture: BatchNorm -> Conv5x1(4) -> Conv5x1(8) -> Conv3x1(16) -> AvgPool -> FC + ~1000 parameters (with 1 input variable, 4 classes) + """ + def __init__(self, config, input_features=256, variables=1, num_classes=4): + super().__init__(config, input_features=input_features, variables=variables, + num_classes=num_classes) + self.model_spec = self.gen_model_spec() + self._init_model_from_spec(model_spec=self.model_spec, variables=self.variables, + input_features=self.input_features, num_classes=self.num_classes) + + def gen_model_spec(self): + layers = py_utils.DictPlus() + layers += {'0': dict(type='BatchNormLayer', num_features=self.variables)} + layers += {'1': dict(type='ConvBNReLULayer', in_channels=self.variables, out_channels=4, kernel_size=(5, 1), stride=(1, 1))} + layers += {'2': dict(type='ConvBNReLULayer', in_channels=4, out_channels=8, kernel_size=(5, 1), stride=(1, 1))} + layers += {'3': dict(type='ConvBNReLULayer', in_channels=8, out_channels=16, kernel_size=(3, 1), stride=(1, 1))} + layers += {'4': dict(type='AdaptiveAvgPoolLayer', output_size=(4, 1))} + layers += {'5': dict(type='ReshapeLayer', ndim=2)} + layers += {'6': dict(type='LinearLayer', in_features=None, out_features=self.num_classes)} + model_spec = dict(model_spec=layers) + return model_spec + + # Export all classification models __all__ = [ # Non-NPU models (residual networks, specialized architectures) @@ -710,4 +925,12 @@ def gen_model_spec(self): 'CNN_TS_GEN_BASE_13K_NPU', 'CNN_TS_GEN_BASE_20K_NPU', 'CNN_TS_GEN_BASE_55K_NPU', + # Application-specific models (arc fault, motor fault, fan imbalance) + 'CNN_AF_3L_200', + 'CNN_AF_3L_300', + 'CNN_AF_3L_700', + 'CNN_AF_3L_1400', + 'CNN_MF_1L', + 'CNN_MF_2L', + 'CNN_MF_3L', ] diff --git a/tinyml-tinyverse/tinyml_tinyverse/common/utils/utils.py b/tinyml-tinyverse/tinyml_tinyverse/common/utils/utils.py index 555eec58..2b241ad2 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/common/utils/utils.py +++ b/tinyml-tinyverse/tinyml_tinyverse/common/utils/utils.py @@ -77,8 +77,6 @@ from logging import getLogger from os.path import basename as opb -import matplotlib -matplotlib.use('Agg') # Force non-interactive backend import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, auc from sklearn.preprocessing import label_binarize @@ -613,6 +611,8 @@ def __init__(self, window_size=20, fmt="{median:.4f} ({global_avg:.4f})"): self.fmt = fmt def update(self, value, n=1): + if isinstance(value, torch.Tensor): + value = value.item() self.deque.append(value) self.count += n self.total += value * n @@ -684,11 +684,9 @@ def __init__(self, delimiter="\t", phase=""): def update(self, **kwargs): for k, v in kwargs.items(): - if isinstance(v, torch.Tensor): - v = v.item() - if not isinstance(v, (float, int)): + if not isinstance(v, (float, int, torch.Tensor)): raise TypeError( - f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}" + f"This method expects the value of the input arguments to be of type float, int, or Tensor, instead got {type(v)}" ) self.meters[k].update(v) @@ -721,7 +719,8 @@ def log_every(self, iterable, print_freq=5, header=None): iter_time = SmoothedValue(fmt="{avg:.4f}") data_time = SmoothedValue(fmt="{avg:.4f}") space_fmt = ":" + str(len(str(len(iterable)))) + "d" - if torch.cuda.is_available(): + _has_mem = torch.cuda.is_available() or (hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()) + if _has_mem: log_msg = self.delimiter.join( [ header, @@ -730,7 +729,7 @@ def log_every(self, iterable, print_freq=5, header=None): "{meters}", "time: {time}", "data: {data}", - "max mem: {memory:.0f}", + "mem: {memory:.0f}", ] ) else: @@ -745,7 +744,11 @@ def log_every(self, iterable, print_freq=5, header=None): if print_freq is not None and i % print_freq == 0: eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) - if torch.cuda.is_available(): + if _has_mem: + if torch.cuda.is_available(): + mem = torch.cuda.max_memory_allocated() / MB + else: + mem = torch.mps.current_allocated_memory() / MB self.logger.info( log_msg.format( i, @@ -754,7 +757,7 @@ def log_every(self, iterable, print_freq=5, header=None): meters=str(self), time=str(iter_time), data=str(data_time), - memory=torch.cuda.max_memory_allocated() / MB, + memory=mem, ) ) else: @@ -824,15 +827,19 @@ def get_confusion_matrix(output, target, classes): Compute multi-class confusion matrix, a matrix of dimension num_classes x num_classes where each element at position (i,j) is the number of examples with true class i that were predicted to be class j. """ - return multiclass_confusion_matrix(output, target, classes) + # torcheval uses sparse COO tensors internally, which are not supported + # on MPS. Move to CPU for this computation. + return multiclass_confusion_matrix(output.cpu(), target.cpu(), classes) def get_f1_score(output, target, classes): - return multiclass_f1_score(output, target, num_classes=classes) + # Move to CPU — torcheval may use ops unsupported on MPS + return multiclass_f1_score(output.cpu(), target.cpu(), num_classes=classes) def get_au_roc(output, target, classes): - return multiclass_auroc(output, target, num_classes=classes, average='macro') + # Move to CPU — torcheval may use ops unsupported on MPS + return multiclass_auroc(output.cpu(), target.cpu(), num_classes=classes, average='macro') def get_r2_score(output,target): @@ -1062,55 +1069,64 @@ def seed_everything(seed: int): def train_one_epoch_regression(model, criterion, optimizer, data_loader, device, epoch, transform, lambda_reg=0.01, - apex=False, model_ema=None, print_freq=None, phase="", dual_op=True, is_ptq=False, **kwargs): + apex=False, model_ema=None, print_freq=None, phase="", dual_op=True, is_ptq=False, + amp_autocast=None, grad_scaler=None, **kwargs): + import contextlib + amp_ctx = amp_autocast or contextlib.nullcontext() model.train() metric_logger = MetricLogger(delimiter=" ", phase=phase) metric_logger.add_meter("lr", window_size=1, fmt="{value}") metric_logger.add_meter("samples/s", window_size=10, fmt="{value}") print_freq = print_freq if print_freq else len(data_loader) header = f"Epoch: [{epoch}]" - # TODO: If transform is required if transform: transform = transform.to(device) for _, data, target in metric_logger.log_every(data_loader, print_freq, header): - # for _, data, target in data_loader: start_time = timeit.default_timer() - data = data.to(device).float() - target = target.to(device).float() + data = data.float().to(device) + target = target.float().to(device) if transform: data = transform(data) - if dual_op: - output, secondary_output = model(data) # (n,1,8000) -> (n,35) - else: - output = model(data) # (n,1,8000) -> (n,35) + with amp_ctx: + if dual_op: + output, secondary_output = model(data) + else: + output = model(data) + loss = criterion(output, target) - loss = criterion(output, target) if not is_ptq: - optimizer.zero_grad() + optimizer.zero_grad(set_to_none=True) if lambda_reg: l1_norm = sum(p.abs().sum() for p in model.parameters()) l2_norm = sum(p.pow(2.0).sum() for p in model.parameters()) - loss += (lambda_reg*(l1_norm)) loss += (lambda_reg*(l2_norm)) - if apex: + if grad_scaler is not None: + grad_scaler.scale(loss).backward() + grad_scaler.step(optimizer) + grad_scaler.update() + elif apex: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() + optimizer.step() else: loss.backward() - optimizer.step() + optimizer.step() mse = get_mse(output, target).squeeze() batch_size = output.shape[0] - metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['mse'].update(mse, n=batch_size) + metric_logger.update(loss=loss.detach(), lr=optimizer.param_groups[0]["lr"]) + metric_logger.meters['mse'].update(mse.detach(), n=batch_size) metric_logger.meters['samples/s'].update(batch_size / (timeit.default_timer() - start_time)) if model_ema: model_ema.update_parameters(model) def train_one_epoch_forecasting(model, criterion, optimizer, data_loader, device, epoch, transform, - apex=False, model_ema=None, print_freq=None, phase="", dual_op=True, is_ptq=False, **kwargs): + apex=False, model_ema=None, print_freq=None, phase="", dual_op=True, is_ptq=False, + amp_autocast=None, grad_scaler=None, **kwargs): + import contextlib + amp_ctx = amp_autocast or contextlib.nullcontext() model.train() print_freq = print_freq if print_freq else len(data_loader) metric_logger = MetricLogger(delimiter=" ", phase=phase) @@ -1118,47 +1134,47 @@ def train_one_epoch_forecasting(model, criterion, optimizer, data_loader, device metric_logger.add_meter("samples/s", window_size=10, fmt="{value}") header = f"Epoch: [{epoch}]" - # TODO: If transform is required if transform: transform = transform.to(device) - + for _, data, target in metric_logger.log_every(data_loader, print_freq, header): start_time = timeit.default_timer() - data = data.to(device).float() - target = target.to(device).float() + data = data.float().to(device) + target = target.float().to(device) - # apply transform and model on whole batch directly on device - # TODO: If transform is required if transform: data = transform(data) - if dual_op: - output, secondary_output = model(data) # (n,1,8000) -> (n,35) - else: - output = model(data) # (n,1,8000) -> (n,35)" - - output = output.view_as(target) - - loss = criterion(output, target) + with amp_ctx: + if dual_op: + output, secondary_output = model(data) + else: + output = model(data) + output = output.view_as(target) + loss = criterion(output, target) if not is_ptq: - optimizer.zero_grad() - if apex: + optimizer.zero_grad(set_to_none=True) + if grad_scaler is not None: + grad_scaler.scale(loss).backward() + grad_scaler.step(optimizer) + grad_scaler.update() + elif apex: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() + optimizer.step() else: loss.backward() - optimizer.step() + optimizer.step() - smape_score = smape(target.detach(), output.detach()).item() batch_size = output.shape[0] - metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['smape'].update(smape_score, n=batch_size) + metric_logger.update(loss=loss.detach(), lr=optimizer.param_groups[0]["lr"]) + metric_logger.meters['smape'].update(smape(target.detach(), output.detach()), n=batch_size) metric_logger.meters['samples/s'].update(batch_size / (timeit.default_timer() - start_time)) if model_ema: model_ema.update_parameters(model) - + def evaluate_forecasting(model, criterion, data_loader, device, transform=None, log_suffix='', print_freq=None, phase='', dual_op=True, **kwargs): logger = getLogger(f"root.train_utils.evaluate.{phase}") @@ -1173,8 +1189,8 @@ def evaluate_forecasting(model, criterion, data_loader, device, transform=None, with torch.no_grad(): for _, data, target in metric_logger.log_every(data_loader, print_freq, header): # Move data and target to the specified device - data = data.to(device, non_blocking=True).float() - target = target.to(device, non_blocking=True).float() + data = data.float().to(device) + target = target.float().to(device) # Apply transformation if provided if transform: @@ -1191,10 +1207,9 @@ def evaluate_forecasting(model, criterion, data_loader, device, transform=None, # Compute loss loss = criterion(output, target) - metric_logger.update(loss=loss.item()) + metric_logger.update(loss=loss.detach()) batch_size = data.shape[0] - smape_score = smape(target.detach(), output.detach()).item() - metric_logger.meters['smape'].update(smape_score, n=batch_size) + metric_logger.meters['smape'].update(smape(target.detach(), output.detach()), n=batch_size) targets.append(target) outputs.append(output) @@ -1241,16 +1256,14 @@ def evaluate_regression(model, criterion, data_loader, device, transform, log_su print_freq = print_freq if print_freq else len(data_loader) header = f'Test: {log_suffix}' - target_array = torch.Tensor([]).to(device, non_blocking=True) - predictions_array = torch.Tensor([]).to(device, non_blocking=True) with torch.no_grad(): val_loss = 0 target_list = [] predictions_list = [] # for _, data, target in metric_logger.log_every(data_loader, print_freq, header): for _, data, target in data_loader: - data = data.to(device, non_blocking=True).float() - target = target.to(device, non_blocking=True).float() + data = data.float().to(device) + target = target.float().to(device) if transform: data = transform(data) @@ -1261,14 +1274,14 @@ def evaluate_regression(model, criterion, data_loader, device, transform, log_su output = model(data) loss = criterion(output, target) # .squeeze() - val_loss += loss.item() + val_loss += loss.detach() mse = get_mse(output, target) # .squeeze() r2 = get_r2_score(output, target) # .squeeze() target_list.append(target) predictions_list.append(output) # FIXME need to take into account that the datasets could have been padded in distributed setup batch_size = data.shape[0] - metric_logger.update(loss=loss.item()) + metric_logger.update(loss=loss.detach()) metric_logger.meters['mse'].update(mse, n=batch_size) metric_logger.meters['r2'].update(r2, n=batch_size) @@ -1283,44 +1296,48 @@ def evaluate_regression(model, criterion, data_loader, device, transform, log_su def train_one_epoch_anomalydetection( model, criterion, optimizer, data_loader, device, epoch, transform, - apex=False, model_ema=None, print_freq=None, phase="", dual_op=True, is_ptq=False, **kwargs): - logger = getLogger(f"root.train_utils.train.{phase}") + apex=False, model_ema=None, print_freq=None, phase="", dual_op=True, is_ptq=False, + amp_autocast=None, grad_scaler=None, **kwargs): + import contextlib + amp_ctx = amp_autocast or contextlib.nullcontext() model.train() print_freq = print_freq if print_freq else len(data_loader) metric_logger = MetricLogger(delimiter=" ", phase=phase) - header = f"Training - Epoch[{epoch}]: " + header = f"Training - Epoch[{epoch}]:" if transform: transform = transform.to(device) - for _,data, labels in metric_logger.log_every(data_loader, print_freq, header): - # for batch_idx, (data, target) in enumerate(data_loader): + for _, data, labels in metric_logger.log_every(data_loader, print_freq, header): start_time = timeit.default_timer() - data = data.to(device).float() - #In anomlay detection with auto encoder, the target and the input data both are same. + data = data.float().to(device) + # In anomaly detection with autoencoder, the target and the input data are the same target = data.clone() - # apply transform and model on whole batch directly on device - # TODO: If transform is required if transform: data = transform(data) - if dual_op: - output, secondary_output = model(data) # (n,1,8000) -> (n,35) - else: - output = model(data) # (n,1,8000) -> (n,35) - - loss = criterion(output, target) + with amp_ctx: + if dual_op: + output, secondary_output = model(data) + else: + output = model(data) + loss = criterion(output, target) if not is_ptq: - optimizer.zero_grad() - if apex: + optimizer.zero_grad(set_to_none=True) + if grad_scaler is not None: + grad_scaler.scale(loss).backward() + grad_scaler.step(optimizer) + grad_scaler.update() + elif apex: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() + optimizer.step() else: loss.backward() - optimizer.step() + optimizer.step() + + metric_logger.update(loss=loss.detach()) - metric_logger.update(loss=loss.item()) - logger.info(f'{header} MSE {metric_logger.loss.global_avg:.6f}') if model_ema: model_ema.update_parameters(model) @@ -1336,7 +1353,7 @@ def evaluate_anomalydetection( with torch.no_grad(): for _, data, labels in metric_logger.log_every(data_loader, print_freq, header): # for data, target in data_loader: - data = data.to(device, non_blocking=True).float() + data = data.float().to(device) #In anomlay detection with auto encoder, the target and the input data both are same. target = data if transform: @@ -1347,69 +1364,64 @@ def evaluate_anomalydetection( else: output = model(data) - loss = criterion(output, target) + loss = criterion(output, target) batch_size = data.shape[0] - metric_logger.update(loss=loss.item()) + metric_logger.update(loss=loss.detach()) metric_logger.synchronize_between_processes() - logger.info(f'{header} MSE {metric_logger.loss.global_avg:.6f}') return metric_logger.loss.global_avg def train_one_epoch_classification( model, criterion, optimizer, data_loader, device, epoch, transform, apex=False, model_ema=None, print_freq=None, phase="", dual_op=True, is_ptq=False, - nn_for_feature_extraction=False, **kwargs): + nn_for_feature_extraction=False, amp_autocast=None, grad_scaler=None, **kwargs): + import contextlib + amp_ctx = amp_autocast or contextlib.nullcontext() model.train() print_freq = print_freq if print_freq else len(data_loader) metric_logger = MetricLogger(delimiter=" ", phase=phase) metric_logger.add_meter("lr", window_size=1, fmt="{value}") metric_logger.add_meter("samples/s", window_size=10, fmt="{value}") - # - # new_sample_rate = 8000 - # transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=new_sample_rate) header = f"Epoch: [{epoch}]" - # TODO: If transform is required if transform: transform = transform.to(device) - # for _, data, target in metric_logger.log_every(data_loader, print_freq, header): for data_raw, data_feat_ext, target in metric_logger.log_every(data_loader, print_freq, header): - # for batch_idx, (data, target) in enumerate(data_loader): - # logger.info(batch_idx) start_time = timeit.default_timer() if nn_for_feature_extraction: - data = data_raw.to(device).float() + data = data_raw.float().to(device) else: - data = data_feat_ext.to(device).float() - target = target.to(device).long() + data = data_feat_ext.float().to(device) + target = target.long().to(device) - # apply transform and model on whole batch directly on device - # TODO: If transform is required if transform: data = transform(data) - if dual_op: - output, secondary_output = model(data) # (n,1,8000) -> (n,35) - else: - output = model(data) # (n,1,8000) -> (n,35) - - # negative log-likelihood for a tensor of size (batch x 1 x n_output) - loss = criterion(output, target) + with amp_ctx: + if dual_op: + output, secondary_output = model(data) + else: + output = model(data) + loss = criterion(output, target) if not is_ptq: - optimizer.zero_grad() - if apex: + optimizer.zero_grad(set_to_none=True) + if grad_scaler is not None: + grad_scaler.scale(loss).backward() + grad_scaler.step(optimizer) + grad_scaler.update() + elif apex: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() + optimizer.step() else: loss.backward() - optimizer.step() + optimizer.step() acc1 = accuracy(output, target, topk=(1,)) - # f1_score = get_f1_score(output, target, kwargs.get('num_classes')) batch_size = output.shape[0] - metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) - metric_logger.meters['acc1'].update(acc1[0], n=batch_size) + metric_logger.update(loss=loss.detach(), lr=optimizer.param_groups[0]["lr"]) + metric_logger.meters['acc1'].update(acc1[0].detach(), n=batch_size) metric_logger.meters['samples/s'].update(batch_size / (timeit.default_timer() - start_time)) if model_ema: @@ -1422,21 +1434,20 @@ def evaluate_classification(model, criterion, data_loader, device, transform, lo metric_logger = MetricLogger(delimiter=" ", phase=phase) print_freq = print_freq if print_freq else len(data_loader) header = f'Test: {log_suffix}' - confusion_matrix_total = np.zeros((kwargs.get('num_classes'), kwargs.get('num_classes'))) + num_classes = kwargs.get('num_classes') + confusion_matrix_total = np.zeros((num_classes, num_classes)) - target_array = torch.Tensor([]).to(device, non_blocking=True) - predictions_array = torch.Tensor([]).to(device, non_blocking=True) + target_list = [] + predictions_list = [] with torch.no_grad(): - # for _, data, target in metric_logger.log_every(data_loader, print_freq, header): - for data_raw, data_feat_ext, target in metric_logger.log_every(data_loader, print_freq, header): - # for data, target in data_loader: + for data_raw, data_feat_ext, target in metric_logger.log_every(data_loader, print_freq, header): if nn_for_feature_extraction: - data = data_raw.to(device, non_blocking=True).float() + data = data_raw.float().to(device) else: - data = data_feat_ext.to(device).float() + data = data_feat_ext.float().to(device) - target = target.to(device, non_blocking=True).long() + target = target.long().to(device) if transform: data = transform(data) @@ -1445,51 +1456,35 @@ def evaluate_classification(model, criterion, data_loader, device, transform, lo else: output = model(data) - target_array = torch.cat((target_array, target)) - predictions_array = torch.cat((predictions_array, output)) + target_list.append(target) + predictions_list.append(output) loss = criterion(output.squeeze(), target) acc1 = accuracy(output.squeeze(), target, topk=(1,)) - f1_score = get_f1_score(output, target, kwargs.get('num_classes')) - confusion_matrix = get_confusion_matrix(output, target, kwargs.get('num_classes')).cpu().numpy() - confusion_matrix_total += confusion_matrix - - # au_roc = get_au_roc(output, target, kwargs.get('num_classes')) # .cpu().numpy() - # au_roc_total += au_roc - # FIXME need to take into account that the datasets could have been padded in distributed setup batch_size = data.shape[0] - metric_logger.update(loss=loss.item()) - metric_logger.meters['acc1'].update(acc1[0], n=batch_size) - metric_logger.meters['f1'].update(f1_score, n=batch_size) - # metric_logger.meters['auroc'].update(au_roc, n=batch_size) - # metric_logger.meters['cm'].update(confusion_matrix, n=batch_size) - # metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) + metric_logger.update(loss=loss.detach()) + metric_logger.meters['acc1'].update(acc1[0].detach(), n=batch_size) # gather the stats from all processes metric_logger.synchronize_between_processes() - # logger.info(f'{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f}') + # Concatenate all predictions/targets once (O(n) instead of O(n²) per-batch torch.cat) + target_array = torch.cat(target_list) + predictions_array = torch.cat(predictions_list) + + # Compute all metrics at epoch-end instead of per-batch logger.info(f'{header} Acc@1 {accuracy(predictions_array.squeeze(), target_array, topk=(1,))[0]:.3f}') - logger.info(f'{header} F1-Score {get_f1_score(predictions_array.squeeze(), target_array, kwargs.get("num_classes")):.3f}') - # auc = get_au_roc_from_conf_matrix(confusion_matrix_total) - # logger.info('AU-ROC Score: {:.3f}'.format(auc)) - auc = get_au_roc(predictions_array, target_array, kwargs.get('num_classes')) + f1 = get_f1_score(predictions_array.squeeze(), target_array, num_classes) + logger.info(f'{header} F1-Score {f1:.3f}') + auc = get_au_roc(predictions_array, target_array, num_classes) logger.info("AU-ROC Score: {:.3f}".format(auc)) - logger.info('Confusion Matrix:\n {}'.format(tabulate(pd.DataFrame(get_confusion_matrix( - predictions_array.cpu(), target_array.type(dtype=torch.int64).cpu(), kwargs.get('num_classes')), - columns=[f"Predicted as: {x}" for x in range(kwargs.get('num_classes'))], - index=[f"Ground Truth: {x}" for x in range(kwargs.get('num_classes'))]), headers="keys", tablefmt='grid'))) - - # logger.info(f'{header} AUROC {metric_logger.auroc.global_avg:.3f}') - # logger.info('\n' + '\n'.join([f"Ground Truth:(Class {i}), Predicted:(Class {j}): {int(confusion_matrix_total[i][j])}" for j in range(kwargs.get('num_classes')) for i in range(kwargs.get('num_classes'))])) - - # logger.info('Confusion Matrix:\n {}'.format(tabulate(pd.DataFrame(confusion_matrix_total, - # columns=[f"Predicted as: {x}" for x in range(kwargs.get('num_classes'))], - # index=[f"Ground Truth: {x}" for x in range(kwargs.get('num_classes'))]), - # headers="keys", tablefmt='grid'))) + confusion_matrix_total = get_confusion_matrix( + predictions_array.cpu(), target_array.type(dtype=torch.int64).cpu(), num_classes).numpy() + logger.info('Confusion Matrix:\n {}'.format(tabulate(pd.DataFrame(confusion_matrix_total, + columns=[f"Predicted as: {x}" for x in range(num_classes)], + index=[f"Ground Truth: {x}" for x in range(num_classes)]), headers="keys", tablefmt='grid'))) - # logger.info(f'AU-ROC: {au_roc_total}') - return metric_logger.acc1.global_avg, metric_logger.f1.global_avg, auc, confusion_matrix_total, predictions_array, target_array + return metric_logger.acc1.global_avg, f1, auc, confusion_matrix_total, predictions_array, target_array def print_file_level_classification_summary(dataset, predicted, ground_truth,phase): logger_flcs = getLogger(f"root.utils.print_file_level_classification_summary.{phase}") @@ -1724,8 +1719,8 @@ def get_trained_feature_extraction_model(model, args, data_loader, data_loader_t for data_raw, data_fe, _ in data_loader: start_time = timeit.default_timer() - data_raw = data_raw.to(device).float() - data_fe = data_fe.to(device).float() + data_raw = data_raw.float().to(device) + data_fe = data_fe.float().to(device) output = model(data_raw) # (n,1,8000) -> (n,35) @@ -1733,7 +1728,7 @@ def get_trained_feature_extraction_model(model, args, data_loader, data_loader_t loss = criterion(output, data_fe) if not is_ptq: - optimizer.zero_grad() + optimizer.zero_grad(set_to_none=True) loss.backward() optimizer.step() if not is_ptq: @@ -1751,8 +1746,8 @@ def get_trained_feature_extraction_model(model, args, data_loader, data_loader_t with torch.no_grad(): for data_raw, data_fe, _ in data_loader_test: # Assuming the dataset returns (data, target) - data_raw = data_raw.to(device).float() - data_fe = data_fe.to(device).float() + data_raw = data_raw.float().to(device) + data_fe = data_fe.float().to(device) outputs = model(data_raw) # Calculate loss diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/common/train_base.py b/tinyml-tinyverse/tinyml_tinyverse/references/common/train_base.py index 78fba7f5..3ed24a40 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/common/train_base.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/common/train_base.py @@ -158,8 +158,11 @@ def get_base_args_parser(description="This script loads time series data and tra parser.add_argument('--gpus', default=1, type=int, help='number of gpus') parser.add_argument('-b', '--batch-size', default=1024, type=int) parser.add_argument('--epochs', default=90, type=int, metavar='N', help='number of total epochs to run') - parser.add_argument('-j', '--workers', default=0 if platform.system() in ['Windows'] else 8, type=int, metavar='N', - help='number of data loading workers (default: 8)') + # macOS uses 'spawn' (not 'fork') for multiprocessing; 4 workers is a + # better default than 8 because the spawn overhead saturates quickly. + _default_workers = 0 if platform.system() == 'Windows' else (4 if platform.system() == 'Darwin' else 8) + parser.add_argument('-j', '--workers', default=_default_workers, type=int, metavar='N', + help=f'number of data loading workers (default: {_default_workers})') parser.add_argument('--opt', default='sgd', type=str, help='optimizer') parser.add_argument('--lr', default=0.1, type=float, help='initial learning rate') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/image_classification/test_onnx.py b/tinyml-tinyverse/tinyml_tinyverse/references/image_classification/test_onnx.py index 4c50b9f2..73affb40 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/image_classification/test_onnx.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/image_classification/test_onnx.py @@ -141,12 +141,12 @@ def main(gpu, args): input_name = ort_sess.get_inputs()[0].name output_name = ort_sess.get_outputs()[0].name - predicted = torch.tensor([]).to(device, non_blocking=True) - ground_truth = torch.tensor([]).to(device, non_blocking=True) + predicted = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) + ground_truth = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for batched_raw_data, batched_data, batched_target in data_loader: - batched_raw_data = batched_raw_data.to(device, non_blocking=True).long() - batched_data = batched_data.to(device, non_blocking=True).float() - batched_target = batched_target.to(device, non_blocking=True).long() + batched_raw_data = batched_raw_data.long().to(device, non_blocking=True) + batched_data = batched_data.float().to(device, non_blocking=True) + batched_target = batched_target.long().to(device, non_blocking=True) if transform: batched_data = transform(batched_data) if args.nn_for_feature_extraction: diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx.py b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx.py index ee7c729e..41a0697a 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx.py @@ -124,11 +124,11 @@ def get_reconstruction_errors_stats(args): logger.info(f"Loading ONNX model: {args.model_path}") ort_sess, input_name, output_name = load_onnx_model(args.model_path, args.generic_model) - errors = torch.tensor([]).to(device, non_blocking=True) + errors = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for _, data, targets in data_loader: - data = data.to(device, non_blocking=True).float() - targets = targets.to(device, non_blocking=True).long() - batch_reconstruction_errors = torch.tensor([]).to(device, non_blocking=True) + data = data.float().to(device, non_blocking=True) + targets = targets.long().to(device, non_blocking=True) + batch_reconstruction_errors = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for input, target_label in zip(data, targets): input = input.unsqueeze(0).cpu().numpy() output = torch.tensor(ort_sess.run([output_name], {input_name: input})[0]).to(device) @@ -172,16 +172,16 @@ def main(gpu, args): logger.info(f"Loading ONNX model: {args.model_path}") ort_sess, input_name, output_name = load_onnx_model(args.model_path, args.generic_model) - errors = torch.tensor([]).to(device, non_blocking=True) - ground_truth = torch.tensor([]).to(device, non_blocking=True) + errors = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) + ground_truth = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for _, data, targets in data_loader: - data = data.to(device, non_blocking=True).float() - targets = targets.to(device, non_blocking=True).long() + data = data.float().to(device, non_blocking=True) + targets = targets.long().to(device, non_blocking=True) if transform: data = transform(data) - batch_reconstruction_errors = torch.tensor([]).to(device, non_blocking=True) - batch_target_labels = torch.tensor([]).to(device, non_blocking=True) + batch_reconstruction_errors = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) + batch_target_labels = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for input, target_label in zip(data, targets): input = input.unsqueeze(0).cpu().numpy() output = torch.tensor(ort_sess.run([output_name], {input_name: input})[0]).to(device) diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx_cls.py b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx_cls.py index 1bbd04cb..9d710b2a 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx_cls.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/test_onnx_cls.py @@ -155,12 +155,12 @@ def main(gpu, args): input_name = ort_sess.get_inputs()[0].name output_name = ort_sess.get_outputs()[0].name - predicted = torch.tensor([]).to(device, non_blocking=True) - ground_truth = torch.tensor([]).to(device, non_blocking=True) + predicted = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) + ground_truth = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for batched_raw_data, batched_data, batched_target in data_loader: - batched_raw_data = batched_raw_data.to(device, non_blocking=True).long() - batched_data = batched_data.to(device, non_blocking=True).float() - batched_target = batched_target.to(device, non_blocking=True).long() + batched_raw_data = batched_raw_data.long().to(device, non_blocking=True) + batched_data = batched_data.float().to(device, non_blocking=True) + batched_target = batched_target.long().to(device, non_blocking=True) if transform: batched_data = transform(batched_data) if args.nn_for_feature_extraction: diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/train.py b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/train.py index e8ce6170..55363fa9 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/train.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_anomalydetection/train.py @@ -161,11 +161,11 @@ def get_reconstruction_errors_stats(generic_model, model_path, device, data_load input_name = ort_sess.get_inputs()[0].name output_name = ort_sess.get_outputs()[0].name - errors = torch.tensor([]).to(device, non_blocking=True) + errors = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for _, data, targets in data_loader: - data = data.to(device, non_blocking=True).float() - targets = targets.to(device, non_blocking=True).long() - batch_reconstruction_errors = torch.tensor([]).to(device, non_blocking=True) + data = data.float().to(device, non_blocking=True) + targets = targets.long().to(device, non_blocking=True) + batch_reconstruction_errors = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for input, target_label in zip(data, targets): input = input.unsqueeze(0).cpu().numpy() output = torch.tensor(ort_sess.run([output_name], {input_name: input})[0]).to(device) diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/test_onnx.py b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/test_onnx.py index fb0d8aad..107b1bf4 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/test_onnx.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/test_onnx.py @@ -109,13 +109,13 @@ def main(gpu, args): logger.info(f"Loading ONNX model: {args.model_path}") ort_sess, input_name, output_name = load_onnx_model(args.model_path, args.generic_model) - predicted = torch.tensor([]).to(device, non_blocking=True) - ground_truth = torch.tensor([]).to(device, non_blocking=True) + predicted = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) + ground_truth = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for batched_raw_data, batched_data, batched_target in data_loader: - batched_raw_data = batched_raw_data.to(device, non_blocking=True).long() - batched_data = batched_data.to(device, non_blocking=True).float() - batched_target = batched_target.to(device, non_blocking=True).long() + batched_raw_data = batched_raw_data.long().to(device, non_blocking=True) + batched_data = batched_data.float().to(device, non_blocking=True) + batched_target = batched_target.long().to(device, non_blocking=True) if transform: batched_data = transform(batched_data) if args.nn_for_feature_extraction: diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/train.py b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/train.py index 80cf0e60..1a466287 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/train.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_classification/train.py @@ -239,7 +239,7 @@ def main(gpu, args): logger.info("Creating model") if args.load_saved_model == 'None': - if args.nas_enabled == 'True': + if args.nas_enabled: if args.quantization: model = torch.load(os.path.join(os.path.dirname(args.output_dir), os.path.join('base', 'nas_model.pt')), weights_only=False) else: diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_forecasting/test_onnx.py b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_forecasting/test_onnx.py index d6f0029d..59adc2ae 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_forecasting/test_onnx.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_forecasting/test_onnx.py @@ -104,12 +104,12 @@ def main(gpu, args): logger.info(f"Loading ONNX model: {args.model_path}") ort_sess, input_name, output_name = load_onnx_model(args.model_path, args.generic_model) - predicted = torch.tensor([]).to(device, non_blocking=True) - ground_truth = torch.tensor([]).to(device, non_blocking=True) + predicted = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) + ground_truth = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for _, batched_data, batched_target in data_loader_test: - batched_data = batched_data.to(device, non_blocking=True).float() - batched_target = batched_target.to(device, non_blocking=True).float() + batched_data = batched_data.float().to(device, non_blocking=True) + batched_target = batched_target.float().to(device, non_blocking=True) if transform: batched_data = transform(batched_data) for data in batched_data: diff --git a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_regression/test_onnx.py b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_regression/test_onnx.py index c7c77606..3d4bf1e0 100644 --- a/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_regression/test_onnx.py +++ b/tinyml-tinyverse/tinyml_tinyverse/references/timeseries_regression/test_onnx.py @@ -101,12 +101,12 @@ def main(gpu, args): logger.info(f"Loading ONNX model: {args.model_path}") ort_sess, input_name, output_name = load_onnx_model(args.model_path, args.generic_model) - predicted = torch.tensor([]).to(device, non_blocking=True) - ground_truth = torch.tensor([]).to(device, non_blocking=True) + predicted = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) + ground_truth = torch.tensor([], dtype=torch.float32).to(device, non_blocking=True) for _, batched_data, batched_target in data_loader: - batched_data = batched_data.to(device, non_blocking=True).float() - batched_target = batched_target.to(device, non_blocking=True).float() + batched_data = batched_data.float().to(device, non_blocking=True) + batched_target = batched_target.float().to(device, non_blocking=True) if transform: batched_data = transform(batched_data) for data in batched_data: