Created using Colab

samaid · samaid · commit 4327446ddb06 · 2025-08-08T06:11:29.000-07:00
diff --git a/notebooks/9_1_nvmath-python_interop.ipynb b/notebooks/9_1_nvmath-python_interop.ipynb
@@ -1,124 +1,175 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "7b236cf1",
-   "metadata": {},
-   "source": [
-    "# 9.1. `nvmath-python`: Interoperability with CPU and GPU tensor libraries\n",
-    "The goal of this exercise is to demonstrate how easy it is to plug `nvmath-python` into existing projects that rely on popular CPU or GPU array libraries, such as NumPy, CuPy, and PyTorch, or how easy it is to start a new project where `nvmath-python` is used alongside array libraries."
-   ]
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/samaid/pyhpc-tutorial/blob/main/notebooks/9_1_nvmath-python_interop.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7b236cf1",
+      "metadata": {
+        "id": "7b236cf1"
+      },
+      "source": [
+        "# 9.1. `nvmath-python`: Interoperability with CPU and GPU tensor libraries\n",
+        "The goal of this exercise is to demonstrate how easy it is to plug `nvmath-python` into existing projects that rely on popular CPU or GPU array libraries, such as NumPy, CuPy, and PyTorch, or how easy it is to start a new project where `nvmath-python` is used alongside array libraries."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e38c312d",
+      "metadata": {
+        "id": "e38c312d"
+      },
+      "source": [
+        "### Pure CuPy implementation\n",
+        "\n",
+        "This example demonstrates basic matrix multiplication of CuPy 2D arrays using `matmul`:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "id": "b796dc7e",
+      "metadata": {
+        "id": "b796dc7e",
+        "outputId": "d47076cc-9340-4476-8201-b263dd8a4116",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 460
+        }
+      },
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "CUDARuntimeError",
+          "evalue": "cudaErrorInsufficientDriver: CUDA driver version is insufficient for CUDA runtime version",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mCUDARuntimeError\u001b[0m                          Traceback (most recent call last)",
+            "\u001b[0;32m/tmp/ipython-input-4003678963.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;31m# Prepare sample input data for matrix matmul\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m4000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5000\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      6\u001b[0m \u001b[0mb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrand\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/cupy/random/_sample.py\u001b[0m in \u001b[0;36mrand\u001b[0;34m(*size, **kwarg)\u001b[0m\n\u001b[1;32m     42\u001b[0m         raise TypeError('rand() got unexpected keyword arguments %s'\n\u001b[1;32m     43\u001b[0m                         % ', '.join(kwarg.keys()))\n\u001b[0;32m---> 44\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0mrandom_sample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     45\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     46\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/cupy/random/_sample.py\u001b[0m in \u001b[0;36mrandom_sample\u001b[0;34m(size, dtype)\u001b[0m\n\u001b[1;32m    153\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    154\u001b[0m     \"\"\"\n\u001b[0;32m--> 155\u001b[0;31m     \u001b[0mrs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_generator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_random_state\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    156\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mrs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom_sample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    157\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/cupy/random/_generator.py\u001b[0m in \u001b[0;36mget_random_state\u001b[0;34m()\u001b[0m\n\u001b[1;32m   1304\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1305\u001b[0m     \"\"\"\n\u001b[0;32m-> 1306\u001b[0;31m     \u001b[0mdev\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDevice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1307\u001b[0m     \u001b[0mrs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_random_states\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdev\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1308\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mrs\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32mcupy/cuda/device.pyx\u001b[0m in \u001b[0;36mcupy.cuda.device.Device.__init__\u001b[0;34m()\u001b[0m\n",
+            "\u001b[0;32mcupy_backends/cuda/api/runtime.pyx\u001b[0m in \u001b[0;36mcupy_backends.cuda.api.runtime.getDevice\u001b[0;34m()\u001b[0m\n",
+            "\u001b[0;32mcupy_backends/cuda/api/runtime.pyx\u001b[0m in \u001b[0;36mcupy_backends.cuda.api.runtime.check_status\u001b[0;34m()\u001b[0m\n",
+            "\u001b[0;31mCUDARuntimeError\u001b[0m: cudaErrorInsufficientDriver: CUDA driver version is insufficient for CUDA runtime version"
+          ]
+        }
+      ],
+      "source": [
+        "import cupy as cp\n",
+        "\n",
+        "# Prepare sample input data for matrix matmul\n",
+        "n, m, k = 2000, 4000, 5000\n",
+        "a = cp.random.rand(n, k)\n",
+        "b = cp.random.rand(k, m)\n",
+        "\n",
+        "# Perform matrix multiplication\n",
+        "result = cp.matmul(a, b)\n",
+        "\n",
+        "# Print the result\n",
+        "print(result)\n",
+        "\n",
+        "# Print CUDA device for each array\n",
+        "print(a.device)\n",
+        "print(b.device)\n",
+        "print(result.device)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "7528a6f8",
+      "metadata": {
+        "id": "7528a6f8"
+      },
+      "source": [
+        "### Using `nvmath-python` alongside CuPy\n",
+        "\n",
+        "This is a slight modification of the above example, where matrix multiplications is done using corresponding `nvmath-python` implementation.\n",
+        "\n",
+        "Note that `nvmath-python` supports multiple frameworks, including CuPy. It uses framework's memory pool and the current stream for seamless integration. The result of each operation is a tensor of the same framework that was used to pass the inputs. It is also located on the same device as the inputs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "311ee2e9",
+      "metadata": {
+        "id": "311ee2e9"
+      },
+      "outputs": [],
+      "source": [
+        "# The same matrix multiplication as in the previous example but using nvmath-python\n",
+        "import nvmath\n",
+        "\n",
+        "# Perform matrix multiplication\n",
+        "result = nvmath.linalg.advanced.matmul(a, b)\n",
+        "\n",
+        "# Print the result\n",
+        "print(result)\n",
+        "\n",
+        "# Print CUDA device for each array\n",
+        "print(a.device)\n",
+        "print(b.device)\n",
+        "print(result.device)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "85b2ae1b",
+      "metadata": {
+        "id": "85b2ae1b"
+      },
+      "source": [
+        "As we can see, the code looks essentially the same. If one measures the performance of above implementations, it will be nearly identical.\n",
+        "\n",
+        "This is because CuPy and `nvmath-python` (as well as PyTorch) all use CUDA-X Math Libraries as the engine. It is up to a user, which library to choose for solving the above matrix multiplication problem.\n",
+        "\n",
+        "In the next examples we will demonstrate a few examples, where `nvmath-python` may become essential in reaching peak levels of performance."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "bf34d34d",
+      "metadata": {
+        "id": "bf34d34d"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "nersc-nvmath",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.13.5"
+    },
+    "colab": {
+      "provenance": [],
+      "include_colab_link": true
+    }
   },
-  {
-   "cell_type": "markdown",
-   "id": "e38c312d",
-   "metadata": {},
-   "source": [
-    "### Pure CuPy implementation\n",
-    "\n",
-    "This example demonstrates basic matrix multiplication of CuPy 2D arrays using `matmul`:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b796dc7e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import cupy as cp\n",
-    "\n",
-    "# Prepare sample input data for matrix matmul\n",
-    "n, m, k = 2000, 4000, 5000\n",
-    "a = cp.random.rand(n, k)\n",
-    "b = cp.random.rand(k, m)\n",
-    "\n",
-    "# Perform matrix multiplication\n",
-    "result = cp.matmul(a, b)\n",
-    "\n",
-    "# Print the result\n",
-    "print(result)\n",
-    "\n",
-    "# Print CUDA device for each array\n",
-    "print(a.device)\n",
-    "print(b.device)\n",
-    "print(result.device)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7528a6f8",
-   "metadata": {},
-   "source": [
-    "### Using `nvmath-python` alongside CuPy\n",
-    "\n",
-    "This is a slight modification of the above example, where matrix multiplications is done using corresponding `nvmath-python` implementation.\n",
-    "\n",
-    "Note that `nvmath-python` supports multiple frameworks, including CuPy. It uses framework's memory pool and the current stream for seamless integration. The result of each operation is a tensor of the same framework that was used to pass the inputs. It is also located on the same device as the inputs. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "311ee2e9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# The same matrix multiplication as in the previous example but using nvmath-python\n",
-    "import nvmath\n",
-    "\n",
-    "# Perform matrix multiplication\n",
-    "result = nvmath.linalg.advanced.matmul(a, b)\n",
-    "\n",
-    "# Print the result\n",
-    "print(result)\n",
-    "\n",
-    "# Print CUDA device for each array\n",
-    "print(a.device)\n",
-    "print(b.device)\n",
-    "print(result.device)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "85b2ae1b",
-   "metadata": {},
-   "source": [
-    "As we can see, the code looks essentially the same. If one measures the performance of above implementations, it will be nearly identical. \n",
-    "\n",
-    "This is because CuPy and `nvmath-python` (as well as PyTorch) all use CUDA-X Math Libraries as the engine. It is up to a user, which library to choose for solving the above matrix multiplication problem. \n",
-    "\n",
-    "In the next examples we will demonstrate a few examples, where `nvmath-python` may become essential in reaching peak levels of performance. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bf34d34d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "nersc-nvmath",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.13.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
+  "nbformat": 4,
+  "nbformat_minor": 5
+}