Add RS demo notebook

seofernando25 · seofernando25 · commit f5396ed46a5a · 2025-11-28T02:38:20.000-05:00
diff --git a/notebooks/rs_training.ipynb b/notebooks/rs_training.ipynb
@@ -0,0 +1,305 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "d90a756a",
+      "metadata": {},
+      "source": [
+        "# Model Training Demo"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "id": "63bd092a",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Imports\n",
+        "\n",
+        "from typing import Literal\n",
+        "import os\n",
+        "import sys\n",
+        "import numpy as np\n",
+        "\n",
+        "sys.path.append(os.path.join(os.path.curdir, \"..\"))\n",
+        "\n",
+        "from search.random_search import RandomSearch\n",
+        "from scripts.run_experiment import prepare_dataset\n",
+        "from models.cnn import CNNModel, TrainingConfig\n",
+        "from models.factory import get_model_by_name\n",
+        "from models.decision_tree import DecisionTreeModel\n",
+        "from models.knn import KNNModel\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "70994585",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "CIFAR-10 sample image count: 100\n",
+            "CIFAR-10 sample label shape: (100,)\n",
+            "Individual image shape: (32, 32)\n",
+            "CIFAR-10 pixel value ranges: [0.0, 1.0]\n",
+            "Image data type: float32\n",
+            "Label data type: int64\n",
+            "Sample datasets loaded successfully!\n",
+            "CIFAR-10 sample: 100 train, 50 validation\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Load CIFAR-10 dataset\n",
+        "\n",
+        "cifar10_data = prepare_dataset()\n",
+        "\n",
+        "# Smaller samples for demo (100 train, 50 validation)\n",
+        "SAMPLE_SIZE = 100\n",
+        "VAL_SAMPLE_SIZE = 50\n",
+        "\n",
+        "# Sample from the prepared data\n",
+        "np.random.seed(42)\n",
+        "train_indices = np.random.choice(len(cifar10_data['train_images']), SAMPLE_SIZE, replace=False)\n",
+        "val_indices = np.random.choice(len(cifar10_data['val_images']), VAL_SAMPLE_SIZE, replace=False)\n",
+        "\n",
+        "# CNN uses raw images (List[np.ndarray])\n",
+        "X_train: list[np.ndarray] = [cifar10_data['train_images'][i] for i in train_indices]\n",
+        "X_test: list[np.ndarray] = [cifar10_data['val_images'][i] for i in val_indices]\n",
+        "\n",
+        "# sklearn uses flattened arrays\n",
+        "X_train_flat: np.ndarray = cifar10_data['train_flat'][train_indices]\n",
+        "X_test_flat: np.ndarray = cifar10_data['val_flat'][val_indices]\n",
+        "\n",
+        "# Labels are the same for both\n",
+        "y_train = cifar10_data['train_labels'][train_indices]\n",
+        "y_test = cifar10_data['val_labels'][val_indices]\n",
+        "\n",
+        "# Observe Sample Shapes\n",
+        "print(f\"CIFAR-10 sample image count: {len(X_train)}\")\n",
+        "print(f\"CIFAR-10 sample label shape: {y_train.shape}\")\n",
+        "print(f\"Individual image shape: {X_train[0].shape}\")\n",
+        "\n",
+        "pixel_min = np.min([np.min(img) for img in X_train])\n",
+        "pixel_max = np.max([np.max(img) for img in X_train])\n",
+        "print(f\"CIFAR-10 pixel value ranges: [{pixel_min}, {pixel_max}]\")\n",
+        "\n",
+        "# Show data types\n",
+        "print(f\"Image data type: {X_train[0].dtype}\")\n",
+        "print(f\"Label data type: {y_train.dtype}\")\n",
+        "\n",
+        "print(\"Sample datasets loaded successfully!\")\n",
+        "print(f\"CIFAR-10 sample: {len(X_train)} train, {len(X_test)} validation\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "22d798a6",
+      "metadata": {},
+      "source": [
+        "## Hyperparameter Search Test"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "id": "367be266",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def quick_hyperparameter_test(\n",
+        "    model_keys: list[Literal['dt', 'knn', 'cnn']],\n",
+        "    X_train: list[np.ndarray],\n",
+        "    y_train: np.ndarray,\n",
+        "    X_test: list[np.ndarray],\n",
+        "    y_test: np.ndarray,\n",
+        "    X_train_flat: np.ndarray,\n",
+        "    X_test_flat: np.ndarray,\n",
+        "    dataset_name: str = \"Dataset\",\n",
+        "    trials: int = 5\n",
+        "):\n",
+        "    \"\"\"Perform a quick hyperparameter test using RandomSearch\"\"\"\n",
+        "    # Map model keys to display names\n",
+        "    model_key_to_name = {\n",
+        "        \"dt\": \"Decision Tree\",\n",
+        "        \"knn\": \"K-Nearest Neighbors\",\n",
+        "        \"cnn\": \"Convolutional Neural Network\",\n",
+        "    }\n",
+        "    print(f\"Starting quick hyperparameter test on {dataset_name}\")\n",
+        "    print(\"=\" * 60)\n",
+        "    print(f\"Using RandomSearch with {trials} trials per model\")\n",
+        "    results = {}\n",
+        "    for model_key in model_keys:\n",
+        "        model_name = model_key_to_name.get(model_key, model_key)\n",
+        "        print(f\"\\nTesting {model_name}...\")\n",
+        "        # Get model and parameter space\n",
+        "        model = get_model_by_name(model_key)\n",
+        "        param_space = model.get_param_space()\n",
+        "        # Create evaluation function for this model\n",
+        "        def evaluate_params(params):\n",
+        "            # Create fresh model instance\n",
+        "            model_instance = get_model_by_name(model_key)\n",
+        "            if model_key == \"cnn\":\n",
+        "                assert isinstance(model_instance, CNNModel)\n",
+        "                X_train_prep = X_train\n",
+        "                X_test_prep = X_test\n",
+        "                y_train_prep, y_test_prep = y_train, y_test\n",
+        "                # Separate CNN-specific params from training config params\n",
+        "                cnn_params = {}\n",
+        "                training_config_params = {}\n",
+        "                for param_name, param_value in params.items():\n",
+        "                    if param_name in ['batch_size', 'learning_rate', 'optimizer', 'weight_decay']:\n",
+        "                        training_config_params[param_name] = param_value\n",
+        "                    else:\n",
+        "                        cnn_params[param_name] = param_value\n",
+        "                # Create model with CNN architecture params\n",
+        "                model_instance.create_model(**cnn_params)\n",
+        "                # Create training config with training params\n",
+        "                config = TrainingConfig(epochs=5, **training_config_params)\n",
+        "                # Train using the correct CNN signature\n",
+        "                model_instance.train(X_train_prep, y_train_prep, X_test_prep, y_test_prep, config=config, verbose=False)\n",
+        "                # Evaluate CNN\n",
+        "                return model_instance.evaluate(X_test_prep, y_test_prep)\n",
+        "            else:\n",
+        "                assert isinstance(model_instance, (DecisionTreeModel, KNNModel))\n",
+        "                # sklearn models\n",
+        "                X_train_prep = X_train_flat\n",
+        "                X_test_prep = X_test_flat\n",
+        "                y_train_prep, y_test_prep = y_train, y_test\n",
+        "                # Create model with params, then train\n",
+        "                model_instance.create_model(**params)\n",
+        "                model_instance.train(X_train_prep, y_train_prep)\n",
+        "                # Evaluate sklearn models\n",
+        "                return model_instance.evaluate(X_test_prep, y_test_prep)\n",
+        "        # Create and run RandomSearch (sequential)\n",
+        "        random_search = RandomSearch(\n",
+        "            param_space=param_space,\n",
+        "            evaluate_fn=evaluate_params,\n",
+        "            metric_key=\"accuracy\",\n",
+        "            seed=42,  # For reproducibility\n",
+        "            n_jobs=1   # Sequential execution\n",
+        "        )\n",
+        "        # Run the search\n",
+        "        search_result = random_search.run(trials=trials, verbose=True)\n",
+        "        # Store results for this model\n",
+        "        results[model_name] = {\n",
+        "            \"best_params\": search_result.best_params,\n",
+        "            \"best_score\": search_result.best_metrics.get(\"accuracy\", 0.0),\n",
+        "            \"metrics\": search_result.best_metrics,\n",
+        "            \"trials\": search_result.trials,\n",
+        "            \"history\": search_result.history\n",
+        "        }\n",
+        "        print(f\"Best params: {search_result.best_params}\")\n",
+        "        print(f\"Best score: {search_result.best_metrics.get('accuracy', 0.0):.4f}\")\n",
+        "    print(\"\\n\" + \"=\" * 60)\n",
+        "    print(\"Quick Hyperparameter Test Summary:\")\n",
+        "    for model_name, result in results.items():\n",
+        "        score = result.get(\"best_score\")\n",
+        "        print(f\"{model_name}: Best Score = {score:.4f}\")\n",
+        "    return results\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "id": "72b263d8",
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "\n",
+            "============================================================\n",
+            "Testing all models on CIFAR-10 sample...\n",
+            "Starting quick hyperparameter test on CIFAR-10\n",
+            "============================================================\n",
+            "Using RandomSearch with 5 trials per model\n",
+            "\n",
+            "Testing Decision Tree...\n",
+            "Running 5 trials...\n",
+            "Optimizing for metric: accuracy\n",
+            "Trial 1/5: {'max_depth': 6, 'min_samples_split': 2, 'min_samples_leaf': 5, 'criterion': 'gini'}\n",
+            "Trial 2/5: {'max_depth': 10, 'min_samples_split': 6, 'min_samples_leaf': 2, 'criterion': 'gini'}\n",
+            "Trial 3/5: {'max_depth': 16, 'min_samples_split': 3, 'min_samples_leaf': 1, 'criterion': 'gini'}\n",
+            "Trial 4/5: {'max_depth': 9, 'min_samples_split': 9, 'min_samples_leaf': 9, 'criterion': 'gini'}\n",
+            "Trial 5/5: {'max_depth': 20, 'min_samples_split': 8, 'min_samples_leaf': 9, 'criterion': 'entropy'}\n",
+            "  -> New best! accuracy=0.2600\n",
+            "Best params: {'max_depth': 6, 'min_samples_split': 2, 'min_samples_leaf': 5, 'criterion': 'gini'}\n",
+            "Best score: 0.2600\n",
+            "\n",
+            "Testing K-Nearest Neighbors...\n",
+            "Running 5 trials...\n",
+            "Optimizing for metric: accuracy\n",
+            "Trial 1/5: {'n_neighbors': 23, 'weights': 'uniform', 'metric': 'minkowski'}\n",
+            "Trial 2/5: {'n_neighbors': 26, 'weights': 'distance', 'metric': 'manhattan'}\n",
+            "Trial 3/5: {'n_neighbors': 10, 'weights': 'uniform', 'metric': 'minkowski'}\n",
+            "Trial 4/5: {'n_neighbors': 24, 'weights': 'uniform', 'metric': 'chebyshev'}\n",
+            "Trial 5/5: {'n_neighbors': 4, 'weights': 'uniform', 'metric': 'minkowski'}\n",
+            "  -> New best! accuracy=0.0800\n",
+            "  -> New best! accuracy=0.1200\n",
+            "Best params: {'n_neighbors': 10, 'weights': 'uniform', 'metric': 'minkowski'}\n",
+            "Best score: 0.1200\n",
+            "\n",
+            "Testing Convolutional Neural Network...\n",
+            "Running 5 trials...\n",
+            "Optimizing for metric: accuracy\n",
+            "Trial 1/5: {'kernel_size': 5, 'stride': 1, 'learning_rate': 1.188590529831906e-05, 'batch_size': 64, 'weight_decay': 0.002448918538034762, 'optimizer': 'AdamW'}\n",
+            "Trial 2/5: {'kernel_size': 5, 'stride': 1, 'learning_rate': 0.0010717622652265692, 'batch_size': 16, 'weight_decay': 0.005904925124490396, 'optimizer': 'AdamW'}\n",
+            "Trial 3/5: {'kernel_size': 3, 'stride': 1, 'learning_rate': 4.5280782614269235e-05, 'batch_size': 16, 'weight_decay': 0.00561245062938613, 'optimizer': 'SGD'}\n",
+            "Trial 4/5: {'kernel_size': 3, 'stride': 2, 'learning_rate': 0.0005858643226824373, 'batch_size': 16, 'weight_decay': 0.007588073671297673, 'optimizer': 'AdamW'}\n",
+            "Trial 5/5: {'kernel_size': 5, 'stride': 2, 'learning_rate': 0.00010489421799219316, 'batch_size': 32, 'weight_decay': 0.002153137621075888, 'optimizer': 'SGD'}\n",
+            "  -> New best! accuracy=0.0800\n",
+            "  -> New best! accuracy=0.1000\n",
+            "Best params: {'kernel_size': 5, 'stride': 2, 'learning_rate': 0.00010489421799219316, 'batch_size': 32, 'weight_decay': 0.002153137621075888, 'optimizer': 'SGD'}\n",
+            "Best score: 0.1000\n",
+            "\n",
+            "============================================================\n",
+            "Quick Hyperparameter Test Summary:\n",
+            "Decision Tree: Best Score = 0.2600\n",
+            "K-Nearest Neighbors: Best Score = 0.1200\n",
+            "Convolutional Neural Network: Best Score = 0.1000\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Test on CIFAR-10 only\n",
+        "print(\"\\n\" + \"=\" * 60)\n",
+        "print(\"Testing all models on CIFAR-10 sample...\")\n",
+        "\n",
+        "# List of model keys to test\n",
+        "model_keys: list[Literal['dt', 'knn', 'cnn']] = [\"dt\", \"knn\", \"cnn\"]\n",
+        "\n",
+        "cifar_results = quick_hyperparameter_test(\n",
+        "    model_keys, X_train, y_train, X_test, y_test, X_train_flat, X_test_flat, \"CIFAR-10\", trials=5\n",
+        ")\n",
+        "\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": ".venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.13.7"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}