Smart Data Generator

Sayed-Hossein-Hosseini · Sayed-Hossein-Hosseini · commit f193bc510054 · 2026-02-08T18:11:22.000Z
diff --git a/RhythmAttention_Hybrid_CNN_Transformer_Architecture_for_Arrhythmia_Classification.ipynb b/RhythmAttention_Hybrid_CNN_Transformer_Architecture_for_Arrhythmia_Classification.ipynb
@@ -221,6 +221,72 @@
         "    factor = np.random.uniform(factor_range[0], factor_range[1])\n",
         "    return signal * factor"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## **Smart Data Generator**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Batch data shape: (32, 187, 1)\n",
+            "Batch labels shape: (32, 5)\n"
+          ]
+        }
+      ],
+      "source": [
+        "# 7. Create a Python Generator for training\n",
+        "def rhythm_attention_generator(X, y, batch_size, augment=True):\n",
+        "    \"\"\"\n",
+        "    Yields balanced batches using Weighted Sampling.\n",
+        "    Applies Online Augmentation to minority classes for every iteration.\n",
+        "    \"\"\"\n",
+        "    num_samples = len(X)\n",
+        "    indices = np.arange(num_samples)\n",
+        "    \n",
+        "    while True:\n",
+        "        # Perform Weighted Random Sampling for the batch\n",
+        "        batch_indices = np.random.choice(indices, size=batch_size, p=sample_probabilities)\n",
+        "        \n",
+        "        X_batch = X[batch_indices].copy()\n",
+        "        y_batch = y[batch_indices]\n",
+        "        \n",
+        "        if augment:\n",
+        "            for i in range(batch_size):\n",
+        "                # Apply heavier augmentation to minority classes (1, 2, 3, 4)\n",
+        "                if y_batch[i] != 0:\n",
+        "                    X_batch[i] = apply_amplitude_scaling(add_gaussian_noise(X_batch[i]))\n",
+        "                else:\n",
+        "                    # Apply very light noise to majority class to improve robustness\n",
+        "                    if np.random.rand() > 0.7:\n",
+        "                        X_batch[i] = add_gaussian_noise(X_batch[i], noise_level=0.002)\n",
+        "        \n",
+        "        # Convert labels to One-Hot Encoding for the Softmax output\n",
+        "        y_batch_onehot = tf.keras.utils.to_categorical(y_batch, num_classes=5)\n",
+        "        \n",
+        "        yield X_batch, y_batch_onehot\n",
+        "\n",
+        "# 8. Initialize Training and Validation Generators\n",
+        "train_gen = rhythm_attention_generator(X_train, y_train, batch_size=32, augment=True)\n",
+        "\n",
+        "# Note: Validation/Test data should NOT be weighted or augmented\n",
+        "# We convert it to One-Hot once for evaluation\n",
+        "y_test_onehot = tf.keras.utils.to_categorical(y_test, num_classes=5)\n",
+        "\n",
+        "# Verify the generator output\n",
+        "x_batch, y_batch = next(train_gen)\n",
+        "print(f\"Batch data shape: {x_batch.shape}\")\n",
+        "print(f\"Batch labels shape: {y_batch.shape}\")"
+      ]
     }
   ],
   "metadata": {