diff --git a/CMakeLists.txt b/CMakeLists.txt index 92284b4..ffdac7d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ # # This project contains: # - common/ : Host-DPU control channel library (CMake) -# - dpu-agent/ : BlueField DPU proxy service (CMake) +# - blue-cache/ : BlueField DPU proxy service (CMake) # - examples/cpp/ : NIXL C++ example (CMake) # - examples/standalone: Standalone host test tool (CMake) # - nixl-plugin/ : NIXL backend plugin source. It is NOT built here directly; @@ -29,7 +29,7 @@ option(BUILD_EXAMPLES "Build host-side examples (C++ NIXL example + standalone add_subdirectory(common) if(BUILD_DPU_AGENT) - add_subdirectory(dpu-agent) + add_subdirectory(blue-cache) endif() if(BUILD_EXAMPLES) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5d1e337..db134f1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,13 +5,13 @@ Thank you for your interest in BlueCache. This document describes how to build, ## Repository Structure - `common/` — Shared host-DPU wire protocol (`dma_transfer.h`). -- `nixl-plugin/` — NIXL `DOCA_DMA_PROXY` backend plugin source. -- `dpu-agent/` — BlueField DPU proxy service. +- `nixl-plugin/` — NIXL `BLUE_CACHE` backend plugin source. +- `blue-cache/` — BlueField DPU proxy service. - `examples/` — Standalone C++ example and LMCache reference architecture. - `scripts/` — `patch_nixl.sh`, `build_all.sh`, and helpers. - `docs/` — Architecture and integration documentation. -## Building the DPU Agent +## Building blue-cache Requirements: @@ -25,7 +25,7 @@ mkdir build && cd build export DOCA_DIR=/opt/mellanox/doca export NIXL_ROOT=/opt/nvidia/nvda_nixl cmake .. -DBUILD_EXAMPLES=OFF -make -j$(nproc) dpu_dma_copy +make -j$(nproc) blue-cache ``` ## Patching NIXL with the Plugin @@ -36,7 +36,7 @@ The plugin is designed to be injected into a NIXL source tree: ./scripts/patch_nixl.sh /path/to/nixl/source cd /path/to/nixl/source -meson setup build -Denable_plugins=DOCA_DMA_PROXY +meson setup build -Denable_plugins=BLUE_CACHE ninja -C build ``` @@ -59,11 +59,11 @@ cd examples/standalone ./scripts/build_host.sh # On DPU -./build-dpu/dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 +./build-dpu/blue-cache -p 0000:03:00.0 -m 256 -q 4 # On Host -./build-host/gpu_dma_copy -o push -p 0000:ba:00.0 -g 0 -f /tmp/test.bin -s 64 -./build-host/gpu_dma_copy -o pull -p 0000:ba:00.0 -g 0 -f /tmp/test.bin -O /tmp/test.out +./build-host/blue-cache-host -o push -p 0000:ba:00.0 -g 0 -f /tmp/test.bin -s 64 +./build-host/blue-cache-host -o pull -p 0000:ba:00.0 -g 0 -f /tmp/test.bin -O /tmp/test.out ``` ### NIXL C++ example @@ -71,7 +71,7 @@ cd examples/standalone Build with `-DBUILD_EXAMPLES=ON` and run after the DPU agent is started: ```bash -./build/examples/cpp/nixl_doca_dma_proxy_example 0000:ba:00.0 /tmp/dpu_object.bin +./build/examples/cpp/nixl_blue_cache_example 0000:ba:00.0 /tmp/dpu_object.bin ``` ## Upstreaming to NIXL @@ -82,10 +82,10 @@ When the plugin is ready for upstream NIXL: 2. Run `./scripts/patch_nixl.sh` against a clean NIXL checkout. 3. Review the diff in the NIXL tree. 4. Create a NIXL PR containing: - - Plugin source under `src/plugins/doca_dma_proxy/` + - Plugin source under `src/plugins/blue_cache/` - Build integration in `meson.build` and `src/plugins/meson.build` - Static plugin registration in `src/core/nixl_plugin_manager.cpp` - - Tests under `test/unit/plugins/doca_dma_proxy/` (when available) + - Tests under `test/unit/plugins/blue_cache/` (when available) - Documentation updates ## Code Style diff --git a/README.md b/README.md index 5da344c..557363e 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ A complete GPU KV-cache offload solution that moves KV tensors from Host GPU mem This project provides an end-to-end pipeline for offloading GPU-resident data — primarily LLM KV caches — to storage attached to a local BlueField DPU. It is built from three integrated pieces: -1. **DPU Agent (`dpu-agent/`)** — Runs on the BlueField DPU ARM cores. It imports the remote GPU memory map, executes DOCA DMA operations, and writes incoming data to DPU-side storage backends. -2. **NIXL Plugin (`nixl-plugin/`)** — A host-side NIXL backend named `DOCA_DMA_PROXY`. It registers GPU buffers as `VRAM_SEG`, exports them over PCIe with DOCA DMA, and forwards transfer requests to the DPU agent. -3. **LMCache Integration (`examples/lmcache/`)** — A patch set and configuration example that enables LMCache v0.4.3 to use the `DOCA_DMA_PROXY` backend for transparent KV-cache tiering. +1. **blue-cache (`blue-cache/`)** — The DPU-side agent. It runs on the BlueField DPU ARM cores, imports the remote GPU memory map, executes DOCA DMA operations, and writes incoming data to DPU-side storage backends. +2. **NIXL Plugin (`nixl-plugin/`)** — A host-side NIXL backend named `BLUE_CACHE`. It registers GPU buffers as `VRAM_SEG`, exports them over PCIe with DOCA DMA, and forwards transfer requests to the DPU agent. +3. **LMCache Integration (`examples/lmcache/`)** — A patch set and configuration example that enables LMCache v0.4.3 to use the `BLUE_CACHE` backend for transparent KV-cache tiering. Together these components let an application such as LMCache express a transfer as `VRAM_SEG ↔ OBJ_SEG` and have the actual PCIe DMA and storage I/O executed by the DPU. @@ -45,7 +45,7 @@ By using the BlueField DPU's dedicated DOCA DMA engine, this solution: │ Host │ │ ┌─────────────────────┐ ┌─────────────────────────────┐ │ │ │ LMCache / vLLM │ │ NIXL Agent │ │ -│ │ (KV-cache manager) │───►│ + DOCA_DMA_PROXY backend │ │ +│ │ (KV-cache manager) │───►│ + BLUE_CACHE backend │ │ │ └─────────────────────┘ │ - registers GPU VRAM │ │ │ │ - exports GPU mmap │ │ │ │ - sends transfer requests │ │ @@ -62,22 +62,25 @@ By using the BlueField DPU's dedicated DOCA DMA engine, this solution: ┌─────────────────────────────────────────────────────────────────────────────────────────┐ │ BlueField DPU │ │ ┌─────────────────────────────────────────────────────────────────────────────────┐ │ -│ │ dpu_dma_copy agent │ │ +│ │ blue-cache agent │ │ │ │ ┌───────────────┐ ┌───────────────┐ ┌─────────────────────────────────┐ │ │ │ │ │ DOCA DMA │───►│ staging buffer│───►│ NIXL storage backend │ │ │ │ │ │ engine │ │ (DPU DRAM) │ │ (posix / xdfs / xdfs_kv / ...) │ │ │ │ │ └───────────────┘ └───────────────┘ └─────────────────────────────────┘ │ │ │ │ │ │ │ │ │ ▼ │ │ -│ │ ┌───────────────┐ │ │ -│ │ │ DPU-local │ │ │ -│ │ │ NVMe / OBJ │ │ │ -│ │ └───────────────┘ │ │ +│ │ ┌──────────────┴──────────────┐ │ │ +│ │ │ │ │ │ +│ │ ▼ ▼ │ │ +│ │ ┌─────────────┐ ┌─────────────────┐ │ │ +│ │ │ DPU-local │ │ Remote Storage │ │ │ +│ │ │ (posix) │ │ xdfs / xdfs_kv │ │ │ +│ │ └─────────────┘ └─────────────────┘ │ │ │ └─────────────────────────────────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────────────────────────────┘ ``` -### DPU Agent +### blue-cache The DPU agent is the piece that executes the offload. It runs as a service on the BlueField DPU and is intentionally separate from the NIXL library so it can evolve independently. @@ -88,7 +91,7 @@ Responsibilities: - Execute chunked, pipelined DOCA DMA with configurable queue depth. - Forward received data to a NIXL storage backend running on the DPU, which in turn writes to local files or object storage. -Build and run instructions are in [`dpu-agent/README.md`](dpu-agent/README.md). +Build and run instructions are in [`blue-cache/README.md`](blue-cache/README.md). ### NIXL Plugin @@ -105,7 +108,7 @@ Because NIXL loads backends dynamically, the plugin source is injected into a NI [`examples/lmcache/`](examples/lmcache/) contains: -- `lmcache_integration.patch` — modifications to LMCache v0.4.3 to recognize and use the `DOCA_DMA_PROXY` backend. +- `lmcache_integration.patch` — modifications to LMCache v0.4.3 to recognize and use the `BLUE_CACHE` backend. - `lmcache-config.yaml` — sample configuration. - `patch_lmcache.sh` — helper that applies the patch idempotently. @@ -117,7 +120,7 @@ After patching LMCache, you can configure a storage backend that points to the D . ├── common/ # Shared host-DPU control channel + wire protocol (dma_transfer.h) ├── nixl-plugin/ # NIXL backend plugin source (patch into NIXL) -├── dpu-agent/ # BlueField DPU proxy service +├── blue-cache/ # BlueField DPU proxy service ├── examples/ │ ├── cpp/ # NIXL C++ example │ ├── python/ # NIXL Python example @@ -132,23 +135,21 @@ After patching LMCache, you can configure a storage backend that points to the D ## Quick Start -### 1. Build the DPU Agent +### 1. Build blue-cache On the BlueField DPU: ```bash -export DOCA_DIR=/opt/mellanox/doca -export NIXL_ROOT=/opt/nvidia/nvda_nixl mkdir -p build && cd build cmake .. -DBUILD_EXAMPLES=OFF -make -j$(nproc) dpu_dma_copy +make -j$(nproc) blue-cache ``` Run the agent (TCP fallback mode for the easiest first test): ```bash -./dpu-agent/dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 -b posix -T +./blue-cache/blue-cache -p 0000:03:00.0 -m 256 -q 4 -b posix -T ``` Omit `-T` to use DOCA Comch mode. @@ -161,7 +162,7 @@ On the host where NIXL is built: ./scripts/patch_nixl.sh /path/to/nixl/source cd /path/to/nixl/source -meson setup build -Denable_plugins=DOCA_DMA_PROXY +meson setup build -Denable_plugins=BLUE_CACHE ninja -C build ``` @@ -172,7 +173,7 @@ The patch script is idempotent; running it multiple times is safe. ```bash export NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/plugins -python3 examples/python/nixl_doca_dma_proxy_example.py \ +python3 examples/python/nixl_blue_cache_example.py \ -o push \ -p 0000:ba:00.0 \ -g 0 \ @@ -192,7 +193,7 @@ This project has been verified against **NIXL v1.1.0**. Other NIXL versions may - [`docs/ARCHITECTURE.md`](docs/ARCHITECTURE.md) — Host plugin, DPU agent, control plane, and data plane design. - [`docs/LMCache_INTEGRATION.md`](docs/LMCache_INTEGRATION.md) — KV-cache offload reference architecture. -- [`dpu-agent/README.md`](dpu-agent/README.md) — Build, run, and tune the DPU agent. +- [`blue-cache/README.md`](blue-cache/README.md) — Build, run, and tune the DPU-side agent. - [`examples/python/README.md`](examples/python/README.md) — Python end-to-end example. - `examples/standalone/` — Standalone host test tool that does not require NIXL. - [`CONTRIBUTING.md`](CONTRIBUTING.md) — Build, test, and NIXL upstreaming workflow. @@ -201,15 +202,33 @@ This project has been verified against **NIXL v1.1.0**. Other NIXL versions may ### NIXL build fails with `fatal error: toml++/toml.hpp: No such file or directory` -NIXL 1.1.0 uses `tomlplusplus` as a required dependency. If the telemetry plugin is enabled, the include path may not be propagated correctly. +NIXL 1.1.0 uses `tomlplusplus` as a required dependency. When the telemetry plugin is enabled, its `doca` backend may miss the `tomlplusplus` include path because `nixl_common_dep` is not listed in its dependencies. -Disable telemetry plugins before building: +**Recommended fix**: patch `src/plugins/telemetry/doca/meson.build` to add `nixl_common_dep`: + +```diff +# In src/plugins/telemetry/doca/meson.build +- dependencies: [nixl_infra, absl_log_dep, doca_dep], ++ dependencies: [nixl_infra, nixl_common_dep, absl_log_dep, doca_dep], +``` + +Then rebuild: + +```bash +cd /path/to/nixl/source +meson setup build --wipe -Denable_plugins=BLUE_CACHE +ninja -C build +``` + +This fix mirrors the upstream NIXL commit [`b98dd59`](https://github.com/ai-dynamo/nixl/commit/b98dd59f1f8854113ef38de5c3054b3e9294f0c9). It keeps telemetry enabled while correctly propagating the required include path. + +**Fallback**: If you do not need telemetry, disable the telemetry plugins entirely: ```bash cd /path/to/nixl/source sed -i "s/^subdir('telemetry')/# subdir('telemetry')/" src/plugins/meson.build -meson setup build --wipe -Denable_plugins=DOCA_DMA_PROXY +meson setup build --wipe -Denable_plugins=BLUE_CACHE ninja -C build ``` @@ -219,17 +238,17 @@ The C++ examples require CUDA Toolkit. On a machine without CUDA, disable exampl ```bash cmake .. -DBUILD_EXAMPLES=OFF -make dpu_dma_copy +make blue-cache ``` -Or build the DPU agent directly from the `dpu-agent/` directory: +Or build blue-cache directly from the `blue-cache/` directory: ```bash -cd dpu-agent +cd blue-cache ./scripts/build_dpu.sh ``` -### `DOCA_DMA_PROXY` plugin not found at runtime +### `BLUE_CACHE` plugin not found at runtime Set the plugin search path: @@ -243,7 +262,7 @@ Or in Python/C++ code: agent.add_plugin_directory("/opt/nvidia/nvda_nixl/lib/plugins") ``` -If NIXL was built with `-Dstatic_plugins=DOCA_DMA_PROXY`, the plugin is linked into `libnixl.so` and no search path is needed. +If NIXL was built with `-Dstatic_plugins=BLUE_CACHE`, the plugin is linked into `libnixl.so` and no search path is needed. ### `doca_dma.h` not found diff --git a/dpu-agent/CMakeLists.txt b/blue-cache/CMakeLists.txt similarity index 86% rename from dpu-agent/CMakeLists.txt rename to blue-cache/CMakeLists.txt index 4ecd4c1..c52069a 100644 --- a/dpu-agent/CMakeLists.txt +++ b/blue-cache/CMakeLists.txt @@ -4,7 +4,7 @@ # BlueField DPU Agent for BlueCache. # # Builds: -# - dpu_dma_copy : DPU-side service (requires DOCA + NIXL) +# - blue-cache : DPU-side service (requires DOCA + NIXL) cmake_minimum_required(VERSION 3.18) @@ -66,13 +66,13 @@ endif() include_directories(${NIXL_INCLUDE_DIR}) -add_executable(dpu_dma_copy - src/dpu_dma_copy.c +add_executable(blue-cache + src/blue_cache_agent.c src/storage_backend.cpp ) -set_source_files_properties(src/dpu_dma_copy.c PROPERTIES LANGUAGE CXX) -target_link_libraries(dpu_dma_copy - doca_dma_proxy_common +set_source_files_properties(src/blue_cache_agent.c PROPERTIES LANGUAGE CXX) +target_link_libraries(blue-cache + blue_cache_common ${DOCA_DMA_LIB} ${DOCA_COMMON_LIB} ${DOCA_COMCH_LIB} @@ -80,8 +80,8 @@ target_link_libraries(dpu_dma_copy ${NIXL_BUILD_LIBRARY} pthread ) -target_compile_options(dpu_dma_copy PRIVATE -Wall -Wextra) -set_target_properties(dpu_dma_copy PROPERTIES +target_compile_options(blue-cache PRIVATE -Wall -Wextra) +set_target_properties(blue-cache PROPERTIES BUILD_RPATH "${NIXL_ROOT}/lib;${NIXL_ROOT}/lib64" INSTALL_RPATH "${NIXL_ROOT}/lib;${NIXL_ROOT}/lib64" ) diff --git a/dpu-agent/README.md b/blue-cache/README.md similarity index 83% rename from dpu-agent/README.md rename to blue-cache/README.md index d058969..0c56a70 100644 --- a/dpu-agent/README.md +++ b/blue-cache/README.md @@ -1,12 +1,12 @@ -# DPU Agent (`dpu_dma_copy`) +# blue-cache -The `dpu_dma_copy` executable is the BlueField DPU agent. It runs on the DPU and executes DOCA DMA operations requested by the host. The matching host-side test client is in [`examples/standalone/`](../examples/standalone/). +The `blue-cache` executable is the BlueField DPU agent. It runs on the DPU and executes DOCA DMA operations requested by the host. The matching host-side test client is in [`examples/standalone/`](../examples/standalone/). For a full project overview, see the [root README](../README.md). ## Overview -`dpu_dma_copy` runs on the BlueField DPU and provides a simple request/response service for GPU↔DPU DMA transfers: +`blue-cache` runs on the BlueField DPU and provides a simple request/response service for GPU↔DPU DMA transfers: - **Push**: Host GPU → file on the DPU - **Pull**: File on the DPU → Host GPU @@ -48,7 +48,7 @@ Transfer flow: │ ├── storage_backend.h │ └── dma_transfer.h -> ../common/include/dma_transfer.h ├── src/ -│ ├── dpu_dma_copy.c +│ ├── blue_cache_agent.c │ └── storage_backend.cpp └── scripts/ ├── build_dpu.sh @@ -88,10 +88,10 @@ chmod +x scripts/build_host.sh Output: ```bash -examples/standalone/build-host/examples/standalone/gpu_dma_copy +examples/standalone/build-host/examples/standalone/blue-cache-host ``` -### DPU agent +### blue-cache ```bash chmod +x scripts/build_dpu.sh @@ -101,7 +101,7 @@ chmod +x scripts/build_dpu.sh Output: ```bash -build-dpu/dpu_dma_copy +build-dpu/blue-cache ``` Or build only the DPU agent from the project root: @@ -110,7 +110,7 @@ Or build only the DPU agent from the project root: cd /path/to/BlueCache mkdir -p build && cd build cmake .. -DBUILD_EXAMPLES=OFF -DDOCA_DIR=/opt/mellanox/doca -make dpu_dma_copy +make blue-cache ``` ## Run @@ -120,13 +120,13 @@ make dpu_dma_copy COMCH mode (default): ```bash -./build-dpu/dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 +./build-dpu/blue-cache -p 0000:03:00.0 -m 256 -q 4 ``` TCP mode (fallback): ```bash -./build-dpu/dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 -T +./build-dpu/blue-cache -p 0000:03:00.0 -m 256 -q 4 -T ``` DPU options: @@ -136,7 +136,7 @@ DPU options: - `-c`: Single DMA chunk size in MiB (auto-selected by default) - `-q`: Concurrent DMA task count (default `4`) - `-r`: DPU-side representor PCI address (auto-selected by default, COMCH only) -- `-S`: Comch service name (default `gpu_dpu_dma_copy`, COMCH only) +- `-S`: Comch service name (default `blue-cache`, COMCH only) - `-T`: Use TCP control plane instead of COMCH On startup the service prints the available DMA device, staging buffer size, maximum task size, and the effective chunk size / queue depth. @@ -146,14 +146,14 @@ On startup the service prints the available DMA device, staging buffer size, max COMCH mode: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o push -p 0000:ba:00.0 -g 3 -f /tmp/on_dpu.bin -s 256 ``` TCP mode: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o push -p 0000:ba:00.0 -g 3 -f /tmp/on_dpu.bin -s 256 -T ``` @@ -175,14 +175,14 @@ Host options: COMCH mode: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o pull -p 0000:ba:00.0 -g 3 -f /tmp/on_dpu.bin -O /tmp/from_gpu.bin ``` TCP mode: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o pull -p 0000:ba:00.0 -g 3 -f /tmp/on_dpu.bin -T -O /tmp/from_gpu.bin ``` @@ -260,20 +260,20 @@ is usually a performance hint rather than a hard failure. The DPU staging buffer On the DPU: ```bash -./build-dpu/dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 +./build-dpu/blue-cache -p 0000:03:00.0 -m 256 -q 4 ``` On the host, push: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o push -p 0000:ba:00.0 -g 3 -f /tmp/test.bin -s 64 ``` On the host, pull: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o pull -p 0000:ba:00.0 -g 3 -f /tmp/test.bin -O /tmp/test.out ``` @@ -282,20 +282,20 @@ On the host, pull: On the DPU: ```bash -./build-dpu/dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 -T +./build-dpu/blue-cache -p 0000:03:00.0 -m 256 -q 4 -T ``` On the host, push: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o push -p 0000:ba:00.0 -g 3 -f /tmp/test.bin -s 64 -T 192.168.100.2 ``` On the host, pull: ```bash -../examples/standalone/build-host/examples/standalone/gpu_dma_copy \ +../examples/standalone/build-host/examples/standalone/blue-cache-host \ -o pull -p 0000:ba:00.0 -g 3 -f /tmp/test.bin -T 192.168.100.2 -O /tmp/test.out ``` diff --git a/dpu-agent/include/dma_transfer.h b/blue-cache/include/dma_transfer.h similarity index 100% rename from dpu-agent/include/dma_transfer.h rename to blue-cache/include/dma_transfer.h diff --git a/dpu-agent/include/dpu_runtime.hpp b/blue-cache/include/dpu_runtime.hpp similarity index 100% rename from dpu-agent/include/dpu_runtime.hpp rename to blue-cache/include/dpu_runtime.hpp diff --git a/dpu-agent/include/storage_backend.h b/blue-cache/include/storage_backend.h similarity index 100% rename from dpu-agent/include/storage_backend.h rename to blue-cache/include/storage_backend.h diff --git a/dpu-agent/scripts/build_dpu.sh b/blue-cache/scripts/build_dpu.sh similarity index 62% rename from dpu-agent/scripts/build_dpu.sh rename to blue-cache/scripts/build_dpu.sh index 5aa264d..9981153 100755 --- a/dpu-agent/scripts/build_dpu.sh +++ b/blue-cache/scripts/build_dpu.sh @@ -8,18 +8,18 @@ BUILD_DIR="${PROJECT_DIR}/build-dpu" mkdir -p "$BUILD_DIR" cd "$BUILD_DIR" -echo "=== Building DPU-side DMA server ===" +echo "=== Building BlueCache DPU agent ===" cmake "$PROJECT_DIR" \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CROSSCOMPILING=ON \ -DDOCA_DIR="${DOCA_DIR:-/opt/mellanox/doca}" -make -j"$(nproc)" dpu_dma_copy +make -j"$(nproc)" blue-cache echo "" echo "Build complete:" -echo " ${BUILD_DIR}/dpu_dma_copy" +echo " ${BUILD_DIR}/blue-cache" echo "" echo "Run:" -echo " ./dpu_dma_copy -p [-r rep_pci] [-m stage_mib] [-c chunk_mib] [-q queue_depth] [-S service]" +echo " ./blue-cache -p [-r rep_pci] [-m stage_mib] [-c chunk_mib] [-q queue_depth] [-S service]" diff --git a/dpu-agent/scripts/preflight_check.sh b/blue-cache/scripts/preflight_check.sh similarity index 100% rename from dpu-agent/scripts/preflight_check.sh rename to blue-cache/scripts/preflight_check.sh diff --git a/dpu-agent/scripts/start_dpu_workers.sh b/blue-cache/scripts/start_dpu_workers.sh similarity index 76% rename from dpu-agent/scripts/start_dpu_workers.sh rename to blue-cache/scripts/start_dpu_workers.sh index d376540..db84eb1 100755 --- a/dpu-agent/scripts/start_dpu_workers.sh +++ b/blue-cache/scripts/start_dpu_workers.sh @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Start multiple dpu_dma_copy workers for parallel throughput verification. +# Start multiple blue-cache workers for parallel throughput verification. # Each worker listens on a separate TCP port and auto-restarts on client disconnect. set -e @@ -12,17 +12,17 @@ BASE_PORT="${BASE_PORT:-18518}" NUM_WORKERS="${NUM_WORKERS:-4}" STAGE_MIB="${STAGE_MIB:-64}" QUEUE_DEPTH="${QUEUE_DEPTH:-4}" -# Auto-detect dpu_dma_copy binary location +# Auto-detect blue-cache binary location SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [ -z "${DPU_DMA_COPY:-}" ] || [ ! -x "$DPU_DMA_COPY" ]; then - if [ -x "$SCRIPT_DIR/../build-dpu/dpu_dma_copy" ]; then - DPU_DMA_COPY="$SCRIPT_DIR/../build-dpu/dpu_dma_copy" - elif [ -x "$SCRIPT_DIR/dpu_dma_copy" ]; then - DPU_DMA_COPY="$SCRIPT_DIR/dpu_dma_copy" - elif [ -x "./dpu_dma_copy" ]; then - DPU_DMA_COPY="./dpu_dma_copy" + if [ -x "$SCRIPT_DIR/../build-dpu/blue-cache" ]; then + DPU_DMA_COPY="$SCRIPT_DIR/../build-dpu/blue-cache" + elif [ -x "$SCRIPT_DIR/blue-cache" ]; then + DPU_DMA_COPY="$SCRIPT_DIR/blue-cache" + elif [ -x "./blue-cache" ]; then + DPU_DMA_COPY="./blue-cache" else - echo "Error: dpu_dma_copy not found. Please set DPU_DMA_COPY env var or build first." >&2 + echo "Error: blue-cache not found. Please set DPU_DMA_COPY env var or build first." >&2 exit 1 fi fi @@ -69,10 +69,10 @@ cleanup() { kill "$pid" 2>/dev/null || true wait "$pid" 2>/dev/null || true done - # Also kill any lingering dpu_dma_copy processes on our ports + # Also kill any lingering blue-cache processes on our ports for i in $(seq 0 $((NUM_WORKERS-1))); do PORT=$((BASE_PORT + i)) - pkill -f "dpu_dma_copy.*-P $PORT" 2>/dev/null || true + pkill -f "blue-cache.*-P $PORT" 2>/dev/null || true done echo "Stopped." exit 0 diff --git a/dpu-agent/src/dpu_dma_copy.c b/blue-cache/src/blue_cache_agent.c similarity index 99% rename from dpu-agent/src/dpu_dma_copy.c rename to blue-cache/src/blue_cache_agent.c index 9f6e58a..f90b47f 100644 --- a/dpu-agent/src/dpu_dma_copy.c +++ b/blue-cache/src/blue_cache_agent.c @@ -1,5 +1,5 @@ /* - * dpu_dma_copy.c - Minimal DPU-side DMA copy service (Memory-backed version) + * blue_cache_agent.c - BlueCache DPU-side agent service (Memory-backed version) * * Control plane: DOCA Comch * Data plane: DOCA DMA with remote mmap import @@ -41,7 +41,7 @@ #define DEFAULT_STAGE_MIB 256ULL #define DEFAULT_QUEUE_DEPTH 4U -#define DEFAULT_SERVICE_NAME "gpu_dpu_dma_copy" +#define DEFAULT_SERVICE_NAME "blue-cache" struct transfer_stats { double dma_seconds; @@ -1231,7 +1231,7 @@ static void usage(const char *prog) int main(int argc, char **argv) { if (argc == 2 && strcmp(argv[1], "--version") == 0) { - printf("%s\n", DOCA_DMA_PROXY_VERSION); + printf("%s\n", BLUE_CACHE_VERSION); return EXIT_SUCCESS; } @@ -1346,7 +1346,7 @@ int main(int argc, char **argv) } APP_LOG_INFO("==========================================="); - APP_LOG_INFO(" Minimal DPU DMA Copy Server %s ", DOCA_DMA_PROXY_VERSION); + APP_LOG_INFO(" BlueCache DPU Agent %s ", BLUE_CACHE_VERSION); APP_LOG_INFO("==========================================="); APP_LOG_INFO("[DPU] PCI: %s", pci_addr); APP_LOG_INFO("[DPU] Stage buffer: %" PRIu64 " MiB", stage_mib); diff --git a/dpu-agent/src/storage_backend.cpp b/blue-cache/src/storage_backend.cpp similarity index 100% rename from dpu-agent/src/storage_backend.cpp rename to blue-cache/src/storage_backend.cpp diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 90137ac..4d32a71 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -27,8 +27,8 @@ set(COMMON_SOURCES set(COMMON_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include PARENT_SCOPE) -add_library(doca_dma_proxy_common STATIC ${COMMON_SOURCES}) -target_include_directories(doca_dma_proxy_common +add_library(blue_cache_common STATIC ${COMMON_SOURCES}) +target_include_directories(blue_cache_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include ${DOCA_INCLUDE_DIR} diff --git a/common/README.md b/common/README.md index 77ad281..196baa8 100644 --- a/common/README.md +++ b/common/README.md @@ -10,4 +10,4 @@ This directory contains the shared ABI between the NIXL host plugin and the Blue The host plugin and DPU agent are written in different languages/styles (C++ with NIXL logging vs C/CUDA with `app_log`), so their control-channel implementations are kept separate. The only hard contract is the wire protocol defined in `dma_transfer.h`. Both sides must use the same version of this file. -When the plugin is patched into the NIXL source tree via `scripts/patch_nixl.sh`, this header is copied into `src/plugins/doca_dma_proxy/` so the plugin directory is self-contained. +When the plugin is patched into the NIXL source tree via `scripts/patch_nixl.sh`, this header is copied into `src/plugins/blue_cache/` so the plugin directory is self-contained. diff --git a/common/include/app_log.h b/common/include/app_log.h index c3667fc..b67a35b 100644 --- a/common/include/app_log.h +++ b/common/include/app_log.h @@ -3,7 +3,7 @@ #include -#define DOCA_DMA_PROXY_VERSION "v0.7" +#define BLUE_CACHE_VERSION "v0.7" #ifdef __cplusplus extern "C" { diff --git a/common/include/dma_transfer.h b/common/include/dma_transfer.h index 31b5050..66dd603 100644 --- a/common/include/dma_transfer.h +++ b/common/include/dma_transfer.h @@ -3,8 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -#ifndef NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DMA_TRANSFER_H -#define NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DMA_TRANSFER_H +#ifndef NIXL_SRC_PLUGINS_BLUE_CACHE_DMA_TRANSFER_H +#define NIXL_SRC_PLUGINS_BLUE_CACHE_DMA_TRANSFER_H #include @@ -76,4 +76,4 @@ typedef struct { double dma_bandwidth_gbps; } dma_transfer_response_t; -#endif // NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DMA_TRANSFER_H +#endif // NIXL_SRC_PLUGINS_BLUE_CACHE_DMA_TRANSFER_H diff --git a/common/src/app_log.c b/common/src/app_log.c index 3112d59..0207aa5 100644 --- a/common/src/app_log.c +++ b/common/src/app_log.c @@ -14,14 +14,14 @@ static app_log_level_t app_log_level = APP_LOG_LEVEL_INFO; static void ensure_app_log_open(void) { if (app_log_is_open == 0) { - openlog("doca-dma-proxy", LOG_PID | LOG_CONS, LOG_USER); + openlog("blue-cache", LOG_PID | LOG_CONS, LOG_USER); app_log_is_open = 1; } } static app_log_level_t log_level_from_env(void) { - const char *env = getenv("DOCA_DMA_PROXY_LOG_LEVEL"); + const char *env = getenv("BLUE_CACHE_LOG_LEVEL"); if (env == NULL) return APP_LOG_LEVEL_INFO; diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index ca5df79..e95aedc 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -4,19 +4,19 @@ BlueCache enables high-throughput, CPU-bypass data movement between **Host GPU memory** and **BlueField DPU memory/storage**. It is implemented as two companion components: -1. **NIXL backend plugin (`DOCA_DMA_PROXY`)** — runs on the host, inside a NIXL application. -2. **DPU agent (`dpu_dma_copy`)** — runs on the BlueField DPU ARM cores. +1. **NIXL backend plugin (`BLUE_CACHE`)** — runs on the host, inside a NIXL application. +2. **DPU agent (`blue-cache`)** — runs on the BlueField DPU ARM cores. ```text Host DPU ┌─────────────────────────────┐ ┌─────────────────────────────┐ -│ Application (LMCache etc.) │ │ dpu_dma_copy agent │ +│ Application (LMCache etc.) │ │ blue-cache agent │ │ │ │ │ ┌─────────────────────┐ │ │ ▼ │ DOCA Comch │ │ DOCA DMA engine │ │ │ ┌─────────────┐ │ or TCP │ │ ┌───────────────┐ │ │ │ │ NIXL agent │ │◄────────────────►│ │ │ staging buffer │ │ │ -│ │ + DOCA_DMA_ │ │ control │ │ └───────┬───────┘ │ │ -│ │ PROXY │ │ messages │ │ │ │ │ +│ │ + BLUE_ │ │ control │ │ └───────┬───────┘ │ │ +│ │ CACHE │ │ messages │ │ │ │ │ │ └──────┬──────┘ │ │ │ ▼ │ │ │ │ │ │ │ ┌───────────────┐ │ │ │ ▼ VRAM_SEG │ │ │ │ NIXL storage │ │ │ @@ -67,9 +67,9 @@ For a **read** (`NIXL_READ`, DPU storage → GPU): 2. Host plugin exports a writable GPU mmap and sends `DMA_REQ_BATCH_PULL`. 3. DPU agent uses a pipelined reader/DMA worker to overlap storage reads with DMA back to GPU. -## DPU Agent Internals +## blue-cache Internals -The DPU agent (`dpu-agent/src/dpu_dma_copy.c`) maintains: +The DPU agent (`blue-cache/src/blue_cache_agent.c`) maintains: - A single DOCA DMA device/context/progress engine. - A reusable staging buffer registered once at startup. @@ -81,7 +81,7 @@ The DPU agent (`dpu-agent/src/dpu_dma_copy.c`) maintains: ## Host Plugin Internals -The host plugin (`nixl-plugin/src/doca_dma_proxy_backend.cpp`) implements the NIXL backend engine interface: +The host plugin (`nixl-plugin/src/blue_cache_backend.cpp`) implements the NIXL backend engine interface: - `registerMem` / `deregisterMem` — manage DOCA mmaps for GPU buffers and object descriptors. - `prepXfer` / `postXfer` — validate descriptor pairs and spawn a worker thread that sends batched control requests. diff --git a/docs/LMCache_INTEGRATION.md b/docs/LMCache_INTEGRATION.md index d737bbf..bb3fa94 100644 --- a/docs/LMCache_INTEGRATION.md +++ b/docs/LMCache_INTEGRATION.md @@ -23,7 +23,7 @@ The BlueCache backend moves the offload path to the BlueField DPU. The DPU's ARM │ └──────────┘ └────────┬────────┘ │ │ │ │ │ ┌─────────────────────────┘ │ -│ │ NIXL DOCA_DMA_PROXY plugin │ +│ │ NIXL BLUE_CACHE plugin │ │ │ - host_pci = 0000:ba:00.0 │ │ │ - gpu_id = 0 │ │ └─────────────────────────────────────┘ @@ -36,7 +36,7 @@ The BlueCache backend moves the offload path to the BlueField DPU. The DPU's ARM │ BlueField DPU │ │ │ ▼ │ │ ┌─────────────────┐ │ -│ │ dpu_dma_copy │ │ +│ │ blue-cache │ │ │ │ agent │ │ │ └────────┬────────┘ │ │ │ │ @@ -64,12 +64,12 @@ On the host, patch and build NIXL: cd BlueCache ./scripts/patch_nixl.sh /path/to/nixl/source cd /path/to/nixl/source -meson setup build -Denable_plugins=DOCA_DMA_PROXY +meson setup build -Denable_plugins=BLUE_CACHE ninja -C build ninja -C build install ``` -### 2. Start the DPU Agent +### 2. Start blue-cache On the BlueField DPU: @@ -77,27 +77,27 @@ On the BlueField DPU: cd BlueCache mkdir build && cd build cmake .. -DDOCA_DIR=/opt/mellanox/doca -DNIXL_ROOT=/opt/nvidia/nvda_nixl -make dpu_dma_copy +make blue-cache -./dpu-agent/dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 +./blue-cache/blue-cache -p 0000:03:00.0 -m 256 -q 4 ``` Use `-T` for TCP control mode if DOCA Comch is not configured. ### 3. Configure LMCache -In the LMCache configuration, select NIXL as the transfer backend and point it to the DOCA_DMA_PROXY plugin: +In the LMCache configuration, select NIXL as the transfer backend and point it to the BLUE_CACHE plugin: ```yaml # LMCache configuration example cache_backend: nixl nixl: - plugin: DOCA_DMA_PROXY + plugin: BLUE_CACHE plugin_params: host_pci: "0000:ba:00.0" gpu_id: "0" ctrl_mode: "auto" # or "comch" / "tcp" - service_name: "gpu_dpu_dma_copy" + service_name: "blue-cache" ``` When `ctrl_mode` is `tcp`, also provide `dpu_host` and `port`. diff --git a/examples/README.md b/examples/README.md index edaf9e0..2b2bbe6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,6 +1,6 @@ # Examples -This directory contains reference material and standalone programs that show how to use the BlueCache `DOCA_DMA_PROXY` backend. +This directory contains reference material and standalone programs that show how to use the BlueCache `BLUE_CACHE` backend. ## `python/` @@ -13,7 +13,7 @@ See [`python/README.md`](python/README.md) for requirements and usage. A minimal C++ program that: 1. Creates a NIXL agent. -2. Loads the `DOCA_DMA_PROXY` backend. +2. Loads the `BLUE_CACHE` backend. 3. Registers GPU memory (`VRAM_SEG`) and a DPU-resident object (`OBJ_SEG`). 4. Performs a `NIXL_WRITE` transfer from GPU to the DPU object. @@ -24,18 +24,18 @@ Build it as part of the top-level CMake project: ```bash cd build cmake .. -DNIXL_ROOT=/opt/nvidia/nvda_nixl -DBUILD_EXAMPLES=ON -make nixl_doca_dma_proxy_example +make nixl_blue_cache_example ``` Run it after the DPU agent is started: ```bash -./examples/cpp/nixl_doca_dma_proxy_example 0000:ba:00.0 /tmp/dpu_object.bin +./examples/cpp/nixl_blue_cache_example 0000:ba:00.0 /tmp/dpu_object.bin ``` ## `standalone/` -A standalone host-side test tool (`gpu_dma_copy`) that directly drives the DPU agent via DOCA Comch/TCP **without using NIXL**. This is useful for validating the basic GPU<->DPU DMA path before integrating the NIXL plugin. +A standalone host-side test tool (`blue-cache-host`) that directly drives the DPU agent via DOCA Comch/TCP **without using NIXL**. This is useful for validating the basic GPU<->DPU DMA path before integrating the NIXL plugin. **Dependencies**: DOCA + CUDA Toolkit (no NIXL required) @@ -51,13 +51,13 @@ Or from the top-level: ```bash cd build cmake .. -DBUILD_DPU_AGENT=OFF -DBUILD_EXAMPLES=ON -make gpu_dma_copy +make blue-cache-host ``` Run it after the DPU agent is started: ```bash -./examples/standalone/build-host/gpu_dma_copy \ +./examples/standalone/build-host/blue-cache-host \ -o push -p 0000:ba:00.0 -g 0 -f /tmp/test.bin -s 64 ``` @@ -68,6 +68,6 @@ LMCache + vLLM reference integration for KV-cache offload. Contents: - `lmcache_integration.patch` — Patches required on top of LMCache v0.4.3. -- `lmcache-config.yaml` — Sample configuration for the `DOCA_DMA_PROXY` backend. +- `lmcache-config.yaml` — Sample configuration for the `BLUE_CACHE` backend. See [`lmcache/README.md`](lmcache/README.md) for the full setup, patch instructions, and tuning guide. diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt index a487dc1..0a2843e 100644 --- a/examples/cpp/CMakeLists.txt +++ b/examples/cpp/CMakeLists.txt @@ -22,13 +22,13 @@ endif() include_directories(${NIXL_INCLUDE_DIR}) -add_executable(nixl_doca_dma_proxy_example nixl_doca_dma_proxy_example.cpp) -target_link_libraries(nixl_doca_dma_proxy_example +add_executable(nixl_blue_cache_example nixl_blue_cache_example.cpp) +target_link_libraries(nixl_blue_cache_example CUDA::cudart ${NIXL_LIBRARY} ${NIXL_BUILD_LIBRARY} ) -set_target_properties(nixl_doca_dma_proxy_example PROPERTIES +set_target_properties(nixl_blue_cache_example PROPERTIES BUILD_RPATH "${NIXL_ROOT}/lib;${NIXL_ROOT}/lib64" INSTALL_RPATH "${NIXL_ROOT}/lib;${NIXL_ROOT}/lib64" ) diff --git a/examples/cpp/nixl_doca_dma_proxy_example.cpp b/examples/cpp/nixl_blue_cache_example.cpp similarity index 94% rename from examples/cpp/nixl_doca_dma_proxy_example.cpp rename to examples/cpp/nixl_blue_cache_example.cpp index 9185e48..debe1c8 100644 --- a/examples/cpp/nixl_doca_dma_proxy_example.cpp +++ b/examples/cpp/nixl_blue_cache_example.cpp @@ -21,7 +21,7 @@ } while (0) int main(int argc, char **argv) { - std::string agent_name("doca_dma_proxy_example"); + std::string agent_name("blue_cache_example"); std::string host_pci = (argc > 1) ? argv[1] : "0000:ba:00.0"; std::string dpu_path = (argc > 2) ? argv[2] : "/tmp/dpu_object.bin"; int gpu_id = 0; @@ -42,13 +42,13 @@ int main(int argc, char **argv) { params["ctrl_mode"] = "auto"; nixlBackendH *backend = nullptr; - nixl_status_t status = agent.createBackend("DOCA_DMA_PROXY", params, backend); + nixl_status_t status = agent.createBackend("BLUE_CACHE", params, backend); if (status != NIXL_SUCCESS) { - std::cerr << "Failed to create DOCA_DMA_PROXY backend: " + std::cerr << "Failed to create BLUE_CACHE backend: " << nixlEnumStrings::statusStr(status) << std::endl; return 1; } - std::cout << "[OK] Created DOCA_DMA_PROXY backend" << std::endl; + std::cout << "[OK] Created BLUE_CACHE backend" << std::endl; nixl_opt_args_t opt_args; opt_args.backends.push_back(backend); diff --git a/examples/lmcache/README.md b/examples/lmcache/README.md index c19cd7a..1102fd1 100644 --- a/examples/lmcache/README.md +++ b/examples/lmcache/README.md @@ -1,11 +1,11 @@ # LMCache KV-Cache Offload with BlueCache -This directory contains the reference integration of LMCache with the NIXL `DOCA_DMA_PROXY` backend for GPU-DPU KV-cache offload. +This directory contains the reference integration of LMCache with the NIXL `BLUE_CACHE` backend for GPU-DPU KV-cache offload. It includes: -- `lmcache_integration.patch` — Patches needed on top of LMCache v0.4.3 to enable the `DOCA_DMA_PROXY` storage backend. -- `lmcache-config.yaml` — Sample LMCache configuration for the `DOCA_DMA_PROXY` backend. +- `lmcache_integration.patch` — Patches needed on top of LMCache v0.4.3 to enable the `BLUE_CACHE` storage backend. +- `lmcache-config.yaml` — Sample LMCache configuration for the `BLUE_CACHE` backend. ## Architecture @@ -13,7 +13,7 @@ It includes: ┌─────────────────────────────────────────────────────────────────────┐ │ Host │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ -│ │ vLLM / │───▶│ LMCache │───▶│ NIXL + DOCA_DMA_PROXY │ │ +│ │ vLLM / │───▶│ LMCache │───▶│ NIXL + BLUE_CACHE │ │ │ │ SGLang │ │ Engine │ │ - export GPU mmap │ │ │ │ │◀───│ │◀───│ - send control request │ │ │ └─────────────┘ └─────────────┘ └─────────────────────────┘ │ @@ -26,7 +26,7 @@ It includes: ┌───────────────────┼──────────────────────────┼──────────────────────┐ │ BlueField DPU │ ▼ │ │ │ ┌─────────────────────────┐ │ -│ │ │ dpu_dma_copy agent │ │ +│ │ │ blue-cache agent │ │ │ │ │ - import remote mmap │ │ │ │ │ - run DOCA DMA │ │ │ │ │ - POSIX storage backend│ │ @@ -46,7 +46,7 @@ LMCache offloads KV tensors from GPU HBM to DPU-local storage through NIXL. The ## Prerequisites - LMCache v0.4.3 source code -- NIXL v1.1.0 built with the `DOCA_DMA_PROXY` plugin +- NIXL v1.1.0 built with the `BLUE_CACHE` plugin - BlueField DPU with DOCA runtime - Host with NVIDIA GPU, DOCA host package, and `nvidia-peermem` loaded @@ -56,20 +56,20 @@ LMCache offloads KV tensors from GPU HBM to DPU-local storage through NIXL. The cd /path/to/BlueCache ./scripts/patch_nixl.sh /path/to/nixl-1.1.0-source cd /path/to/nixl-1.1.0-source -meson setup build -Denable_plugins=DOCA_DMA_PROXY +meson setup build -Denable_plugins=BLUE_CACHE ninja -C build sudo ninja -C build install ``` The default install prefix is `/opt/nvidia/nvda_nixl`. -## Step 2: Start the DPU Agent +## Step 2: Start blue-cache On the BlueField DPU: ```bash -cd /path/to/BlueCache/dpu-agent -./build/dpu_dma_copy \ +cd /path/to/BlueCache/blue-cache +./build/blue-cache \ -p 0000:03:00.0 \ -m 2048 \ -q 64 \ @@ -90,22 +90,6 @@ cd /path/to/BlueCache/examples/lmcache ./patch_lmcache.sh /path/to/LMCache ``` -Or apply the patch manually: - -```bash -cd /path/to/LMCache -patch -p1 < /path/to/BlueCache/examples/lmcache/lmcache_integration.patch -``` - -If you want to regenerate the patch yourself: - -```bash -cd /path/to/LMCache -git diff main dev > lmcache_integration.patch -``` - -If the patch fails due to upstream LMCache changes, review the rejected hunks (`*.rej`) and adjust manually. The patch is specifically against **v0.4.3**; newer LMCache versions may require manual adaptation. - ## Step 4: Configure LMCache Use the provided sample configuration: @@ -121,7 +105,7 @@ Edit the environment variables or values to match your environment: | `LMCACHE_NIXL_BUFFER_SIZE` | GPU staging buffer size in bytes. Must be aligned to `chunk_size * kv_shape_bytes`. | | `LMCACHE_HOST_PCI` | Host-side BlueField PCI address, e.g. `0000:ba:00.0` | | `chunk_size` | KV cache chunk size (tokens) | -| `nixl_backend` | Must be `DOCA_DMA_PROXY` | +| `nixl_backend` | Must be `BLUE_CACHE` | | `nixl_backend_params.ctrl_mode` | `comch` or `tcp` | | `nixl_backend_params.dpu_host` | DPU IP when `ctrl_mode=tcp` | | `nixl_backend_params.port` | TCP port when `ctrl_mode=tcp` | @@ -153,7 +137,7 @@ The exact vLLM launch command depends on the LMCache and vLLM versions. Refer to 1. **Write (GPU → DPU storage)** - vLLM generates KV tensors in GPU HBM. - LMCache evicts cold chunks to the NIXL storage backend. - - The `DOCA_DMA_PROXY` backend exports the GPU buffer and sends a batched request to the DPU agent. + - The `BLUE_CACHE` backend exports the GPU buffer and sends a batched request to the DPU agent. - The DPU agent imports the GPU mmap and DMAs chunks into its staging buffer. - The DPU agent writes chunks to local storage via its POSIX backend. @@ -185,7 +169,7 @@ Disable telemetry plugins: ```bash cd /path/to/nixl-1.1.0-source sed -i "s/^subdir('telemetry')/# subdir('telemetry')/" src/plugins/meson.build -meson setup build --wipe -Denable_plugins=DOCA_DMA_PROXY +meson setup build --wipe -Denable_plugins=BLUE_CACHE ninja -C build ``` @@ -193,12 +177,12 @@ ninja -C build If LMCache has diverged from v0.4.3, apply the patch manually by reviewing the changes in `lmcache_integration.patch`. The main integration points are: -- `lmcache/v1/storage_backend/nixl_storage_backend.py` — adds `DOCA_DMA_PROXY` backend support +- `lmcache/v1/storage_backend/nixl_storage_backend.py` — adds `BLUE_CACHE` backend support - `lmcache/v1/cache_engine.py` — lookup/logic adjustments - `lmcache/v1/memory_management.py` — memory allocator adjustments for NIXL buffers ## References - [`docs/ARCHITECTURE.md`](../../docs/ARCHITECTURE.md) — Backend design details. -- [`dpu-agent/README.md`](../../dpu-agent/README.md) — DPU agent usage. +- [`blue-cache/README.md`](../../blue-cache/README.md) — DPU agent usage. - [`examples/python/`](../python/) — Standalone NIXL Python example without LMCache. diff --git a/examples/lmcache/lmcache-config.yaml b/examples/lmcache/lmcache-config.yaml index ac49dd5..4c5634f 100644 --- a/examples/lmcache/lmcache-config.yaml +++ b/examples/lmcache/lmcache-config.yaml @@ -1,5 +1,5 @@ -# LMCache Configuration for DOCA_DMA_PROXY (GPU-DPU DMA Transfer) +# LMCache Configuration for BLUE_CACHE (GPU-DPU DMA Transfer) # This configuration enables direct GPU-to-DPU KV cache transfers via NVIDIA DOCA chunk_size: 256 @@ -7,7 +7,7 @@ local_cpu: false max_local_cpu_size: 0 remote_url: null -# NIXL Storage Configuration for DOCA +# NIXL Storage Configuration for BlueCache # These are top-level configurations for NIXL # # IMPORTANT: nixl_buffer_size MUST be a multiple of chunk_size * kv_shape_bytes @@ -17,16 +17,16 @@ remote_url: null # # Formula: nixl_buffer_size = num_chunks * (num_layers * 2 * chunk_size * num_kv_heads * head_size * dtype_size) # -nixl_buffer_size: ${LMCACHE_NIXL_BUFFER_SIZE} -nixl_buffer_device: "cuda" # Must be "cuda" for DOCA (GPU-DPU transfer) +nixl_buffer_size: 2936012800 +nixl_buffer_device: "cuda" # Must be "cuda" for BlueCache (GPU-DPU transfer) # Extra configuration (required for NIXL storage) extra_config: # Enable NIXL storage backend enable_nixl_storage: true - # DOCA_DMA_PROXY backend for GPU-DPU transfers - nixl_backend: "DOCA_DMA_PROXY" + # BLUE_CACHE backend for GPU-DPU transfers + nixl_backend: "BLUE_CACHE" # Pool size: number of concurrent transfers (adjust based on workload) # Note: should be >= batch size for batched operations (test uses 4) @@ -35,10 +35,10 @@ extra_config: # Backend-specific parameters nixl_backend_params: # BlueField DPU PCI address (host side) - host_pci: "${LMCACHE_HOST_PCI}" + host_pci: "ba:00.0@10.75.70.125:18517" # DOCA service name - service_name: "gpu_dpu_dma_copy" + service_name: "blue-cache" # Control channel mode: "comch" (COMCH) or "tcp" ctrl_mode: "tcp" diff --git a/examples/lmcache/lmcache_integration.patch b/examples/lmcache/lmcache_integration.patch index 80ce4bc..4eea97f 100644 --- a/examples/lmcache/lmcache_integration.patch +++ b/examples/lmcache/lmcache_integration.patch @@ -32,7 +32,7 @@ index 657abac..9ebf5d0 100644 def validate_nixl_backend(dynamic_storage: bool, backend: str, device: str): if dynamic_storage: # For now only supports OBJ backend - if backend in ("OBJ",): -+ if backend in ("OBJ", "DOCA_DMA_PROXY"): ++ if backend in ("OBJ", "BLUE_CACHE"): return device == "cpu" or device == "cuda" else: return False @@ -93,7 +93,7 @@ index 657abac..9ebf5d0 100644 class NixlDynamicStorageAgent(NixlStorageAgent): + """ -+ Dynamic storage agent for NIXL OBJ/DOCA_DMA_PROXY backends. ++ Dynamic storage agent for NIXL OBJ/BLUE_CACHE backends. + + Workaround for NIXL populate bug with frequent register/deregister: + - Uses globally unique device_id per registration to avoid base conflicts @@ -111,7 +111,7 @@ index 657abac..9ebf5d0 100644 if backend == "OBJ": self.mem_type = "OBJ" -+ elif backend == "DOCA_DMA_PROXY": ++ elif backend == "BLUE_CACHE": + self.mem_type = "OBJ" else: # Already validated in validate_nixl_backend @@ -281,7 +281,7 @@ index e55ce86..c47cba7 100644 + elif "LocalCPUBackend" in self.storage_backends: allocator_backend = self.storage_backends["LocalCPUBackend"] + elif "NixlStorageBackend" in self.storage_backends: -+ # NIXL backend (e.g., DOCA_DMA_PROXY) is itself an allocator ++ # NIXL backend (e.g., BLUE_CACHE) is itself an allocator + allocator_backend = self.storage_backends["NixlStorageBackend"] + else: + # Fallback: find any available AllocatorBackendInterface diff --git a/examples/lmcache/patch_lmcache.sh b/examples/lmcache/patch_lmcache.sh index 2ec530f..c2bf462 100755 --- a/examples/lmcache/patch_lmcache.sh +++ b/examples/lmcache/patch_lmcache.sh @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Apply the DOCA_DMA_PROXY LMCache integration patch to an LMCache v0.4.3 source tree. +# Apply the BLUE_CACHE LMCache integration patch to an LMCache v0.4.3 source tree. # # Usage: # ./patch_lmcache.sh /path/to/lmcache/source @@ -47,8 +47,7 @@ echo "Patch file: $PATCH_FILE" echo "" # Check if patch is already applied by looking for a known marker. -# The dev branch adds DOCA_DMA_PROXY validation in nixl_storage_backend.py. -if grep -q "DOCA_DMA_PROXY" "${LMCACHE_SRC}/lmcache/v1/storage_backend/nixl_storage_backend.py" 2>/dev/null; then +if grep -q "BLUE_CACHE" "${LMCACHE_SRC}/lmcache/v1/storage_backend/nixl_storage_backend.py" 2>/dev/null; then echo "It looks like the patch has already been applied. Skipping." exit 0 fi diff --git a/examples/python/README.md b/examples/python/README.md index e68a6ed..860f56b 100644 --- a/examples/python/README.md +++ b/examples/python/README.md @@ -1,6 +1,6 @@ # Python Example -`nixl_doca_dma_proxy_example.py` demonstrates Host GPU ↔ DPU file transfer via the NIXL Python API using the `DOCA_DMA_PROXY` backend. +`nixl_blue_cache_example.py` demonstrates Host GPU ↔ DPU file transfer via the NIXL Python API using the `BLUE_CACHE` backend. After building both the DPU Agent and the NIXL plugin, this script is the quickest way to verify that the whole pipeline works. @@ -9,24 +9,24 @@ After building both the DPU Agent and the NIXL plugin, this script is the quicke - NIXL Python package installed (`pip install nixl[cu12]` or `nixl[cu13]`) - PyTorch with CUDA support - DOCA runtime on the host -- DPU agent (`dpu_dma_copy`) running on the BlueField DPU +- DPU agent (`blue-cache`) running on the BlueField DPU ## Environment Setup -The NIXL Python bindings need to locate the `DOCA_DMA_PROXY` plugin shared library. Set the plugin directory before running the script: +The NIXL Python bindings need to locate the `BLUE_CACHE` plugin shared library. Set the plugin directory before running the script: ```bash export NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/plugins ``` -If the plugin is installed elsewhere, point `NIXL_PLUGIN_DIR` to the directory containing `libplugin_DOCA_DMA_PROXY.so`. +If the plugin is installed elsewhere, point `NIXL_PLUGIN_DIR` to the directory containing `libplugin_BLUE_CACHE.so`. -## 1. Start the DPU Agent +## 1. Start blue-cache -On the BlueField DPU, start `dpu_dma_copy`. The example below uses TCP control mode (`-T`) and the POSIX storage backend (`-b posix`): +On the BlueField DPU, start `blue-cache`. The example below uses TCP control mode (`-T`) and the POSIX storage backend (`-b posix`): ```bash -./dpu-agent/dpu_dma_copy \ +./blue-cache/blue-cache \ -p 0000:03:00.0 \ -m 2048 \ -q 64 \ @@ -53,7 +53,7 @@ Parameters: ```bash export NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/plugins -python3 nixl_doca_dma_proxy_example.py \ +python3 nixl_blue_cache_example.py \ -o push \ -p 0000:ba:00.0 \ -f /data/test_obj \ @@ -68,7 +68,7 @@ python3 nixl_doca_dma_proxy_example.py \ ```bash export NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/plugins -python3 nixl_doca_dma_proxy_example.py \ +python3 nixl_blue_cache_example.py \ -o pull \ -p 0000:ba:00.0 \ -f /data/test_obj \ @@ -97,5 +97,5 @@ python3 nixl_doca_dma_proxy_example.py \ ## Notes - The example registers GPU memory as `VRAM` and DPU-resident objects as `OBJ`. -- Make sure the memory types match what the `DOCA_DMA_PROXY` plugin supports. +- Make sure the memory types match what the `BLUE_CACHE` plugin supports. - If using COMCH mode, omit `-d` and `-m tcp`. diff --git a/examples/python/nixl_doca_dma_proxy_example.py b/examples/python/nixl_blue_cache_example.py similarity index 89% rename from examples/python/nixl_doca_dma_proxy_example.py rename to examples/python/nixl_blue_cache_example.py index 13fbb39..09bca90 100644 --- a/examples/python/nixl_doca_dma_proxy_example.py +++ b/examples/python/nixl_blue_cache_example.py @@ -15,31 +15,31 @@ # limitations under the License. """ -DOCA_DMA_PROXY NIXL Plugin Python Example (with Batch Transfer Support) +BLUE_CACHE NIXL Plugin Python Example (with Batch Transfer Support) Demonstrates Host GPU <-> DPU file transfer via NIXL Python API using - the DOCA_DMA_PROXY backend (COMCH or TCP control channel). + the BLUE_CACHE backend (COMCH or TCP control channel). Supports both single-file and batched multi-segment transfers. Usage: # Single push (Host GPU -> DPU file) - python nixl_doca_dma_proxy_example.py -o push -p 0000:1a:00.0 -f /tmp/test.bin -s 64 -g 0 + python nixl_blue_cache_example.py -o push -p 0000:1a:00.0 -f /tmp/test.bin -s 64 -g 0 # Batch push (Host GPU -> DPU multiple files) - python nixl_doca_dma_proxy_example.py -o push -p 0000:1a:00.0 -f /tmp/test.bin -s 64 -g 0 --batch-size 8 + python nixl_blue_cache_example.py -o push -p 0000:1a:00.0 -f /tmp/test.bin -s 64 -g 0 --batch-size 8 # Single pull (DPU file -> Host GPU) - python nixl_doca_dma_proxy_example.py -o pull -p 0000:1a:00.0 -f /tmp/test.bin -g 0 + python nixl_blue_cache_example.py -o pull -p 0000:1a:00.0 -f /tmp/test.bin -g 0 # Batch pull (DPU multiple files -> Host GPU) - python nixl_doca_dma_proxy_example.py -o pull -p 0000:1a:00.0 -f /tmp/test.bin -g 0 --batch-size 8 + python nixl_blue_cache_example.py -o pull -p 0000:1a:00.0 -f /tmp/test.bin -g 0 --batch-size 8 Requirements: - nixl python package installed (pip install nixl[cu12] or cu13) - PyTorch with CUDA support - DOCA 3.3 runtime on the host - - DPU side running dpu_dma_copy daemon + - DPU side running blue-cache daemon """ import argparse @@ -55,7 +55,7 @@ def parse_args(): - parser = argparse.ArgumentParser(description="NIXL DOCA_DMA_PROXY Python Example") + parser = argparse.ArgumentParser(description="NIXL BLUE_CACHE Python Example") parser.add_argument("-o", "--mode", required=True, choices=["push", "pull"], help="Transfer mode: push or pull") parser.add_argument("-p", "--host-pci", required=True, @@ -79,8 +79,8 @@ def parse_args(): help="DPU IP for TCP mode (e.g., 192.168.100.2)") parser.add_argument("-P", "--port", type=int, default=18517, help="TCP port (default: 18517)") - parser.add_argument("-S", "--service-name", default="gpu_dpu_dma_copy", - help="COMCH service name (default: gpu_dpu_dma_copy)") + parser.add_argument("-S", "--service-name", default="blue-cache", + help="COMCH service name (default: blue-cache)") parser.add_argument("-O", "--output", help="Output file path for pull mode (default: /tmp/test_pulled.bin)") parser.add_argument("-b", "--batch-size", type=int, default=1, @@ -102,7 +102,7 @@ def main(): agent_name = "test_doca_dma_agent_py" logger.info("=" * 60) - logger.info("NIXL DOCA_DMA_PROXY Python Example") + logger.info("NIXL BLUE_CACHE Python Example") logger.info("Mode: %s Batch: %d Host PCI: %s GPU: %d", args.mode, batch_size, args.host_pci, args.gpu_id) if is_push: @@ -126,15 +126,15 @@ def main(): # 2. Check plugin availability # ------------------------------------------------------------------ plugins = agent.get_plugin_list() - if "DOCA_DMA_PROXY" not in plugins: - logger.error("DOCA_DMA_PROXY plugin not available. Plugins: %s", plugins) + if "BLUE_CACHE" not in plugins: + logger.error("BLUE_CACHE plugin not available. Plugins: %s", plugins) sys.exit(1) - logger.info("DOCA_DMA_PROXY plugin available") + logger.info("BLUE_CACHE plugin available") # ------------------------------------------------------------------ # 3. Create backend # ------------------------------------------------------------------ - logger.info("[2] Creating DOCA_DMA_PROXY backend...") + logger.info("[2] Creating BLUE_CACHE backend...") backend_params = { "host_pci": args.host_pci, "gpu_id": str(args.gpu_id), @@ -145,7 +145,7 @@ def main(): backend_params["dpu_host"] = args.dpu_host backend_params["port"] = str(args.port) - agent.create_backend("DOCA_DMA_PROXY", backend_params) + agent.create_backend("BLUE_CACHE", backend_params) logger.info("Backend created successfully") # ------------------------------------------------------------------ @@ -192,7 +192,7 @@ def main(): # 5. Register VRAM (single registration for the whole buffer) # ------------------------------------------------------------------ logger.info("[4] Registering GPU memory...") - vram_reg = agent.register_memory(gpu_buffer, backends=["DOCA_DMA_PROXY"]) + vram_reg = agent.register_memory(gpu_buffer, backends=["BLUE_CACHE"]) if not vram_reg: logger.error("Failed to register VRAM") sys.exit(1) @@ -213,7 +213,7 @@ def main(): obj_reg = agent.register_memory( [(0, seg_size, 0, dpu_path)], "OBJ", - backends=["DOCA_DMA_PROXY"], + backends=["BLUE_CACHE"], ) if not obj_reg: logger.error("Failed to register OBJ: %s", dpu_path) @@ -228,7 +228,7 @@ def main(): try: resp = agent.query_memory( [(0, 0, 0, dpu_paths[0])], - "DOCA_DMA_PROXY", + "BLUE_CACHE", mem_type="OBJ", ) if resp and resp[0] is not None: @@ -324,9 +324,9 @@ def main(): # ------------------------------------------------------------------ logger.info("[9] Cleaning up...") agent.release_xfer_handle(xfer_handle) - agent.deregister_memory(vram_reg, backends=["DOCA_DMA_PROXY"]) + agent.deregister_memory(vram_reg, backends=["BLUE_CACHE"]) for obj_reg in obj_regs: - agent.deregister_memory(obj_reg, backends=["DOCA_DMA_PROXY"]) + agent.deregister_memory(obj_reg, backends=["BLUE_CACHE"]) del gpu_buffer logger.info("Cleanup complete") diff --git a/examples/standalone/CMakeLists.txt b/examples/standalone/CMakeLists.txt index 7b6e42e..4fa31d8 100644 --- a/examples/standalone/CMakeLists.txt +++ b/examples/standalone/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Standalone host-side test tool for the DPU agent. +# Standalone host-side test tool (blue-cache-host) for the DPU agent. # # This tool does NOT depend on NIXL. It directly uses DOCA Comch/TCP to drive # the DPU agent and is useful for validating the basic GPU<->DPU DMA path. @@ -38,14 +38,14 @@ find_library(DOCA_COMCH_LIB doca_comch REQUIRED ) -add_executable(gpu_dma_copy src/gpu_dma_copy.cu) -target_link_libraries(gpu_dma_copy - doca_dma_proxy_common +add_executable(blue-cache-host src/blue_cache_host.cu) +target_link_libraries(blue-cache-host + blue_cache_common CUDA::cudart ${DOCA_DMA_LIB} ${DOCA_COMMON_LIB} ${DOCA_COMCH_LIB} ) -target_compile_options(gpu_dma_copy PRIVATE +target_compile_options(blue-cache-host PRIVATE $<$:-Xcompiler -Wall> ) diff --git a/examples/standalone/scripts/build_host.sh b/examples/standalone/scripts/build_host.sh index 90e0adb..e5e9a9a 100755 --- a/examples/standalone/scripts/build_host.sh +++ b/examples/standalone/scripts/build_host.sh @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Build the standalone host-side GPU DMA test tool. +# Build the standalone host-side BlueCache test tool (blue-cache-host). set -e @@ -12,7 +12,7 @@ BUILD_DIR="${SCRIPT_DIR}/../build-host" mkdir -p "$BUILD_DIR" -echo "=== Building standalone host-side GPU DMA client ===" +echo "=== Building standalone host-side BlueCache client ===" cmake -S "$PROJECT_DIR" -B "$BUILD_DIR" \ -DBUILD_DPU_AGENT=OFF \ @@ -20,14 +20,14 @@ cmake -S "$PROJECT_DIR" -B "$BUILD_DIR" \ -DCMAKE_BUILD_TYPE=Release \ -DDOCA_DIR="${DOCA_DIR:-/opt/mellanox/doca}" -cmake --build "$BUILD_DIR" --target gpu_dma_copy -j"$(nproc)" +cmake --build "$BUILD_DIR" --target blue-cache-host -j"$(nproc)" echo "" echo "Build complete:" -echo " ${BUILD_DIR}/examples/standalone/gpu_dma_copy" +echo " ${BUILD_DIR}/examples/standalone/blue-cache-host" echo "" echo "Push GPU -> DPU file:" -echo " ./gpu_dma_copy -o push -p -g -f -s [-S service]" +echo " ./blue-cache-host -o push -p -g -f -s [-S service]" echo "" echo "Pull DPU file -> GPU:" -echo " ./gpu_dma_copy -o pull -p -g -f [-O LOCAL_OUT] [-S service]" +echo " ./blue-cache-host -o pull -p -g -f [-O LOCAL_OUT] [-S service]" diff --git a/examples/standalone/src/gpu_dma_copy.cu b/examples/standalone/src/blue_cache_host.cu similarity index 98% rename from examples/standalone/src/gpu_dma_copy.cu rename to examples/standalone/src/blue_cache_host.cu index 4f7fc53..eb6beed 100644 --- a/examples/standalone/src/gpu_dma_copy.cu +++ b/examples/standalone/src/blue_cache_host.cu @@ -1,5 +1,5 @@ /* - * gpu_dma_copy.cu - Minimal host-side GPU DMA copy client + * blue_cache_host.cu - BlueCache host-side standalone test client * * Supported flows: * 1. push: host GPU -> DPU file @@ -25,7 +25,7 @@ extern "C" { #include "doca_device_utils.h" } -#define DEFAULT_SERVICE_NAME "gpu_dpu_dma_copy" +#define DEFAULT_SERVICE_NAME "blue-cache" #define CUDA_CHECK(call) \ do { \ @@ -215,7 +215,7 @@ static void usage(const char *prog) int main(int argc, char **argv) { if (argc == 2 && strcmp(argv[1], "--version") == 0) { - printf("%s\n", DOCA_DMA_PROXY_VERSION); + printf("%s\n", BLUE_CACHE_VERSION); return EXIT_SUCCESS; } @@ -286,7 +286,7 @@ int main(int argc, char **argv) return EXIT_FAILURE; } - APP_LOG_INFO("[HOST] doca-dma-proxy %s", DOCA_DMA_PROXY_VERSION); + APP_LOG_INFO("[HOST] blue-cache-host %s", BLUE_CACHE_VERSION); CUDA_CHECK(cudaSetDevice(gpu_id)); { diff --git a/nixl-plugin/README.md b/nixl-plugin/README.md index feea654..9365041 100644 --- a/nixl-plugin/README.md +++ b/nixl-plugin/README.md @@ -1,4 +1,4 @@ -# BlueCache NIXL Plugin (DOCA_DMA_PROXY) +# BlueCache NIXL Plugin (BLUE_CACHE) > This plugin is part of [BlueCache](https://github.com/BaizeAI/BlueCache). It is designed to be upstreamed to the NIXL main repository via the `scripts/patch_nixl.sh` helper. @@ -33,7 +33,7 @@ Key characteristics: The plugin is built conditionally. Enable it with: ```bash -meson setup build -Denable_plugins=DOCA_DMA_PROXY +meson setup build -Denable_plugins=BLUE_CACHE ``` If DOCA is not found, the plugin is silently skipped (`required: false`). @@ -46,7 +46,7 @@ If DOCA is not found, the plugin is silently skipped (`required: false`). |-----------|------|----------|---------|-------------| | `host_pci` | string | Yes | — | Host-side BlueField PCI address, e.g. `0000:ba:00.0` | | `ctrl_mode` | string | No | `auto` | Control channel: `comch`, `tcp`, or `auto` | -| `service_name` | string | No | `gpu_dpu_dma_copy` | DOCA Comch service name | +| `service_name` | string | No | `blue-cache` | DOCA Comch service name | | `dpu_host` | string | No* | — | DPU IP for TCP mode (required if `ctrl_mode=tcp`) | | `port` | int | No | `18517` | TCP port for control channel | | `gpu_id` | int | No | `0` | GPU device ID for PCI affinity lookup | @@ -68,10 +68,10 @@ params["gpu_id"] = "0"; nixlBackendInitParams init; init.localAgent = "my_agent"; -init.type = "DOCA_DMA_PROXY"; +init.type = "BLUE_CACHE"; init.customParams = ¶ms; -nixlBackendEngine *engine = new nixlDocaDmaProxyEngine(&init); +nixlBackendEngine *engine = new nixlBlueCacheEngine(&init); if (engine->getInitErr()) { // handle initialization failure } @@ -94,23 +94,23 @@ engine->registerMem(obj, OBJ_SEG, obj_md); // ... prepXfer / postXfer / checkXfer ... ``` -## DPU Agent +## blue-cache -This plugin requires a companion DPU agent running on the BlueField DPU. The agent is maintained in the same project under [`dpu-agent/`](../dpu-agent/). +This plugin requires a companion DPU agent running on the BlueField DPU. The agent is maintained in the same project under [`blue-cache/`](../blue-cache/). The agent receives transfer requests from this plugin via the control channel, executes DOCA DMA operations, and delegates final storage delivery to its own NIXL backends (e.g. `POSIX`, `OBJ`). -### Quick Start (DPU Agent) +### Quick Start (blue-cache) ```bash # On DPU -dpu_dma_copy -p 0000:03:00.0 -m 256 -q 4 +blue-cache -p 0000:03:00.0 -m 256 -q 4 # On Host (COMCH mode) -gpu_dma_copy -o push -p 0000:ba:00.0 -g 0 -f /dpu/path -s 64 +blue-cache-host -o push -p 0000:ba:00.0 -g 0 -f /dpu/path -s 64 # On Host (TCP fallback) -gpu_dma_copy -o push -p 0000:ba:00.0 -g 0 -f /dpu/path -s 64 -T +blue-cache-host -o push -p 0000:ba:00.0 -g 0 -f /dpu/path -s 64 -T ``` ## Notes diff --git a/nixl-plugin/src/doca_dma_proxy_backend.cpp b/nixl-plugin/src/blue_cache_backend.cpp similarity index 77% rename from nixl-plugin/src/doca_dma_proxy_backend.cpp rename to nixl-plugin/src/blue_cache_backend.cpp index c861334..54b2328 100644 --- a/nixl-plugin/src/doca_dma_proxy_backend.cpp +++ b/nixl-plugin/src/blue_cache_backend.cpp @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include "doca_dma_proxy_backend.h" +#include "blue_cache_backend.h" #include "common/nixl_log.h" #include #include @@ -28,7 +28,7 @@ extern "C" { } -#define DEFAULT_SERVICE_NAME "gpu_dpu_dma_copy" +#define DEFAULT_SERVICE_NAME "blue-cache" #define DEFAULT_TCP_PORT 18517 @@ -40,7 +40,7 @@ struct HostEndpointCandidate { static void cudaCheck(cudaError_t err, const char* file, int line) { if (err != cudaSuccess) { - NIXL_ERROR << "[DocaDmaProxy] CUDA error at " << file << ":" << line + NIXL_ERROR << "[BlueCache] CUDA error at " << file << ":" << line << ": " << cudaGetErrorString(err); throw std::runtime_error("CUDA error"); } @@ -266,14 +266,14 @@ static HostEndpointCandidate selectBestHostEndpoint( std::string gpu_pci = getGpuPciAddr(gpu_id); if (gpu_pci.empty()) { - NIXL_DEBUG << "[DocaDmaProxy] Unable to resolve GPU " << gpu_id + NIXL_DEBUG << "[BlueCache] Unable to resolve GPU " << gpu_id << " PCI bus id, falling back to first host_pci candidate"; return candidates.front(); } std::vector gpu_ancestry = buildPciAncestry(gpu_pci); if (gpu_ancestry.empty()) { - NIXL_DEBUG << "[DocaDmaProxy] Unable to build GPU PCI ancestry for " + NIXL_DEBUG << "[BlueCache] Unable to build GPU PCI ancestry for " << gpu_pci << ", falling back to first host_pci candidate"; return candidates.front(); } @@ -285,14 +285,14 @@ static HostEndpointCandidate selectBestHostEndpoint( std::vector candidate_ancestry = buildPciAncestry(candidate.pci_addr); if (candidate_ancestry.empty()) { - NIXL_DEBUG << "[DocaDmaProxy] host_pci candidate " << candidate.pci_addr + NIXL_DEBUG << "[BlueCache] host_pci candidate " << candidate.pci_addr << " has no sysfs ancestry, skipping affinity scoring"; continue; } int distance = computePciTreeDistance(gpu_ancestry, candidate_ancestry); size_t lca_depth = findCommonPrefixDepth(gpu_ancestry, candidate_ancestry); - NIXL_DEBUG << "[DocaDmaProxy] host_pci candidate " << candidate.pci_addr + NIXL_DEBUG << "[BlueCache] host_pci candidate " << candidate.pci_addr << " tree_distance=" << distance << " lca_depth=" << lca_depth << " gpu_pci=" << gpu_pci; @@ -309,7 +309,7 @@ static HostEndpointCandidate selectBestHostEndpoint( } if (!found_affinity_match) { - NIXL_DEBUG << "[DocaDmaProxy] No host_pci candidate had a usable PCIe tree " + NIXL_DEBUG << "[BlueCache] No host_pci candidate had a usable PCIe tree " << "distance for gpu_pci=" << gpu_pci << ", falling back to first candidate " << candidates.front().pci_addr; @@ -323,9 +323,9 @@ static HostEndpointCandidate selectBestHostEndpoint( // Request handle: background worker // --------------------------------------------------------------------------- void -nixlDocaDmaProxyReqH::executeTransfer(struct ctrl_channel *ch, struct doca_dev * /*dev*/) { +nixlBlueCacheReqH::executeTransfer(struct ctrl_channel *ch, struct doca_dev * /*dev*/) { state.store(IN_PROGRESS); - NIXL_DEBUG << "[DocaDmaProxy] [xfer] Worker thread started, op=" + NIXL_DEBUG << "[BlueCache] [xfer] Worker thread started, op=" << (operation == NIXL_WRITE ? "WRITE" : "READ") << " segments=" << segments.size(); @@ -345,7 +345,7 @@ nixlDocaDmaProxyReqH::executeTransfer(struct ctrl_channel *ch, struct doca_dev * } const size_t max_batch = std::min(static_cast(DMA_BATCH_MAX_SEGMENTS), max_batch_from_msg); - NIXL_DEBUG << "[DocaDmaProxy] [xfer] Control channel max_msg_size=" << max_msg_size + NIXL_DEBUG << "[BlueCache] [xfer] Control channel max_msg_size=" << max_msg_size << " header_size=" << header_size << " seg_size=" << sizeof(dma_batch_segment_t) << " effective_max_batch=" << max_batch; @@ -388,13 +388,13 @@ nixlDocaDmaProxyReqH::executeTransfer(struct ctrl_channel *ch, struct doca_dev * size_t send_size = offsetof(dma_batch_request_t, segments) + batch_size * sizeof(dma_batch_segment_t); - NIXL_DEBUG << "[DocaDmaProxy] [xfer] Sending batch request segments=[" + NIXL_DEBUG << "[BlueCache] [xfer] Sending batch request segments=[" << seg_idx << "-" << (seg_idx + batch_size - 1) << "] payload_size=" << send_size; doca_error_t send_ret = ctrl_channel_send(ch, &batch_req, static_cast(send_size)); if (send_ret != DOCA_SUCCESS) { - NIXL_ERROR << "[DocaDmaProxy] [xfer] ctrl_channel_send failed: " + NIXL_ERROR << "[BlueCache] [xfer] ctrl_channel_send failed: " << doca_error_get_descr(send_ret); overall_success = false; break; @@ -405,37 +405,37 @@ nixlDocaDmaProxyReqH::executeTransfer(struct ctrl_channel *ch, struct doca_dev * uint32_t msg_len = 0; doca_error_t recv_ret = ctrl_channel_wait_for_message(ch, &resp, sizeof(resp), &msg_len); if (recv_ret != DOCA_SUCCESS) { - NIXL_ERROR << "[DocaDmaProxy] [xfer] ctrl_channel_wait_for_message failed: " + NIXL_ERROR << "[BlueCache] [xfer] ctrl_channel_wait_for_message failed: " << doca_error_get_descr(recv_ret); overall_success = false; break; } if (msg_len != sizeof(resp)) { - NIXL_ERROR << "[DocaDmaProxy] [xfer] Response size mismatch: got " + NIXL_ERROR << "[BlueCache] [xfer] Response size mismatch: got " << msg_len << " expected " << sizeof(resp); overall_success = false; break; } if (resp.magic != DMA_TRANSFER_MAGIC || resp.version != DMA_TRANSFER_VERSION) { - NIXL_ERROR << "[DocaDmaProxy] [xfer] Response magic/version mismatch"; + NIXL_ERROR << "[BlueCache] [xfer] Response magic/version mismatch"; overall_success = false; break; } if (resp.status != 0) { - NIXL_ERROR << "[DocaDmaProxy] [xfer] Batch response error: status=" << resp.status + NIXL_ERROR << "[BlueCache] [xfer] Batch response error: status=" << resp.status << " error_code=" << resp.error_code; overall_success = false; break; } - NIXL_DEBUG << "[DocaDmaProxy] [xfer] Batch completed, transferred=" << resp.transfer_size_bytes + NIXL_DEBUG << "[BlueCache] [xfer] Batch completed, transferred=" << resp.transfer_size_bytes << " bytes, dma=" << resp.dma_seconds << "s, io=" << resp.io_seconds << "s"; seg_idx += batch_size; } if (overall_success) { - NIXL_DEBUG << "[DocaDmaProxy] [xfer] Transfer completed successfully"; + NIXL_DEBUG << "[BlueCache] [xfer] Transfer completed successfully"; state.store(COMPLETED); } else { errorCode = -1; @@ -446,7 +446,7 @@ nixlDocaDmaProxyReqH::executeTransfer(struct ctrl_channel *ch, struct doca_dev * // --------------------------------------------------------------------------- // Engine // --------------------------------------------------------------------------- -nixlDocaDmaProxyEngine::nixlDocaDmaProxyEngine(const nixlBackendInitParams *init_params) +nixlBlueCacheEngine::nixlBlueCacheEngine(const nixlBackendInitParams *init_params) : nixlBackendEngine(init_params), ch_(nullptr), dev_(nullptr) { @@ -485,7 +485,7 @@ nixlDocaDmaProxyEngine::nixlDocaDmaProxyEngine(const nixlBackendInitParams *init HostEndpointCandidate selected_endpoint = selectBestHostEndpoint(host_pci_candidates, gpuId_); - std::string endpoint_info = "[DocaDmaProxy] host_pci candidates=" + hostPci_ + std::string endpoint_info = "[BlueCache] host_pci candidates=" + hostPci_ + " selected=" + selected_endpoint.pci_addr + " for gpu_id=" + std::to_string(gpuId_); if (!selected_endpoint.dpu_host.empty()) { @@ -511,11 +511,11 @@ nixlDocaDmaProxyEngine::nixlDocaDmaProxyEngine(const nixlBackendInitParams *init CUDA_CHECK(cudaSetDevice(gpuId_)); cudaDeviceProp prop; CUDA_CHECK(cudaGetDeviceProperties(&prop, gpuId_)); - NIXL_INFO << "[DocaDmaProxy] Plugin version: " << DOCA_DMA_PROXY_PLUGIN_VERSION; - NIXL_INFO << "[DocaDmaProxy] Backend build timestamp: " << __DATE__ << " " << __TIME__; - NIXL_INFO << "[DocaDmaProxy] Using GPU " << gpuId_ << ": " << prop.name; + NIXL_INFO << "[BlueCache] Plugin version: " << BLUE_CACHE_PLUGIN_VERSION; + NIXL_INFO << "[BlueCache] Backend build timestamp: " << __DATE__ << " " << __TIME__; + NIXL_INFO << "[BlueCache] Using GPU " << gpuId_ << ": " << prop.name; } catch (const std::exception& e) { - NIXL_ERROR << "[DocaDmaProxy] CUDA init failed: " << e.what(); + NIXL_ERROR << "[BlueCache] CUDA init failed: " << e.what(); initErr = true; return; } @@ -523,73 +523,73 @@ nixlDocaDmaProxyEngine::nixlDocaDmaProxyEngine(const nixlBackendInitParams *init if (!hostPci_.empty()) { doca_error_t result = open_dma_device_by_pci(hostPci_.c_str(), &dev_); if (result != DOCA_SUCCESS) { - NIXL_ERROR << "[DocaDmaProxy] Failed to open DOCA device " << hostPci_ + NIXL_ERROR << "[BlueCache] Failed to open DOCA device " << hostPci_ << ": " << doca_error_get_descr(result); initErr = true; return; } } else { - NIXL_WARN << "[DocaDmaProxy] Warning: 'host_pci' not provided. VRAM_SEG registration will fail."; + NIXL_WARN << "[BlueCache] Warning: 'host_pci' not provided. VRAM_SEG registration will fail."; } doca_error_t result = DOCA_ERROR_UNKNOWN; if (ctrl_mode == "tcp") { if (dpu_host.empty()) { - NIXL_ERROR << "[DocaDmaProxy] Error: ctrl_mode=tcp requires 'dpu_host'"; + NIXL_ERROR << "[BlueCache] Error: ctrl_mode=tcp requires 'dpu_host'"; initErr = true; return; } result = ctrl_channel_tcp_client_create(dpu_host.c_str(), port, &ch_); if (result != DOCA_SUCCESS) { - NIXL_ERROR << "[DocaDmaProxy] Failed to create TCP client: " + NIXL_ERROR << "[BlueCache] Failed to create TCP client: " << doca_error_get_descr(result); initErr = true; return; } - NIXL_INFO << "[DocaDmaProxy] TCP client connected to " << dpu_host << ":" << port; + NIXL_INFO << "[BlueCache] TCP client connected to " << dpu_host << ":" << port; } else if (ctrl_mode == "comch") { if (hostPci_.empty()) { - NIXL_ERROR << "[DocaDmaProxy] Error: ctrl_mode=comch requires 'host_pci'"; + NIXL_ERROR << "[BlueCache] Error: ctrl_mode=comch requires 'host_pci'"; initErr = true; return; } result = ctrl_channel_comch_client_create(serviceName_.c_str(), hostPci_.c_str(), &ch_); if (result != DOCA_SUCCESS) { - NIXL_ERROR << "[DocaDmaProxy] Failed to create COMCH client: " + NIXL_ERROR << "[BlueCache] Failed to create COMCH client: " << doca_error_get_descr(result); initErr = true; return; } - NIXL_INFO << "[DocaDmaProxy] COMCH client connected to service '" + NIXL_INFO << "[BlueCache] COMCH client connected to service '" << serviceName_ << "'"; } else { if (!hostPci_.empty()) { result = ctrl_channel_comch_client_create(serviceName_.c_str(), hostPci_.c_str(), &ch_); if (result == DOCA_SUCCESS) { - NIXL_INFO << "[DocaDmaProxy] COMCH client connected to service '" + NIXL_INFO << "[BlueCache] COMCH client connected to service '" << serviceName_ << "'"; } else { - NIXL_ERROR << "[DocaDmaProxy] COMCH client failed: " + NIXL_ERROR << "[BlueCache] COMCH client failed: " << doca_error_get_descr(result); } } if (ch_ == nullptr && !dpu_host.empty()) { - NIXL_ERROR << "[DocaDmaProxy] Falling back to TCP ..."; + NIXL_ERROR << "[BlueCache] Falling back to TCP ..."; result = ctrl_channel_tcp_client_create(dpu_host.c_str(), port, &ch_); if (result == DOCA_SUCCESS) { - NIXL_INFO << "[DocaDmaProxy] TCP client connected to " << dpu_host << ":" << port; + NIXL_INFO << "[BlueCache] TCP client connected to " << dpu_host << ":" << port; } else { - NIXL_ERROR << "[DocaDmaProxy] TCP fallback also failed: " + NIXL_ERROR << "[BlueCache] TCP fallback also failed: " << doca_error_get_descr(result); } } if (ch_ == nullptr) { - NIXL_ERROR << "[DocaDmaProxy] Error: neither COMCH nor TCP could be established. " + NIXL_ERROR << "[BlueCache] Error: neither COMCH nor TCP could be established. " << "Provide 'host_pci' for COMCH or 'dpu_host' (and optionally 'port') for TCP."; initErr = true; return; @@ -598,26 +598,26 @@ nixlDocaDmaProxyEngine::nixlDocaDmaProxyEngine(const nixlBackendInitParams *init result = ctrl_channel_wait_for_connection(ch_); if (result != DOCA_SUCCESS) { - NIXL_ERROR << "[DocaDmaProxy] wait_for_connection failed: " + NIXL_ERROR << "[BlueCache] wait_for_connection failed: " << doca_error_get_descr(result); initErr = true; return; } - NIXL_INFO << "[DocaDmaProxy] Control channel ready"; + NIXL_INFO << "[BlueCache] Control channel ready"; } -nixlDocaDmaProxyEngine::~nixlDocaDmaProxyEngine() { +nixlBlueCacheEngine::~nixlBlueCacheEngine() { if (ch_) ctrl_channel_destroy(ch_); if (dev_) doca_dev_close(dev_); } nixl_status_t -nixlDocaDmaProxyEngine::registerMem(const nixlBlobDesc &mem, +nixlBlueCacheEngine::registerMem(const nixlBlobDesc &mem, const nixl_mem_t &nixl_mem, nixlBackendMD *&out) { std::lock_guard lock(mutex_); - NIXL_DEBUG << "[DocaDmaProxy] [" << DOCA_DMA_PROXY_PLUGIN_VERSION << "] registerMem called for mem type=" + NIXL_DEBUG << "[BlueCache] [" << BLUE_CACHE_PLUGIN_VERSION << "] registerMem called for mem type=" << static_cast(nixl_mem) << " addr=" << mem.addr << " len=" << mem.len << " metaInfo=" << mem.metaInfo @@ -625,11 +625,11 @@ nixlDocaDmaProxyEngine::registerMem(const nixlBlobDesc &mem, if (nixl_mem == VRAM_SEG) { if (mem.len == 0) { - NIXL_ERROR << "[DocaDmaProxy] registerMem failed: len == 0"; + NIXL_ERROR << "[BlueCache] registerMem failed: len == 0"; return NIXL_ERR_INVALID_PARAM; } if (!dev_) { - NIXL_ERROR << "[DocaDmaProxy] Cannot register VRAM without DOCA device. " + NIXL_ERROR << "[BlueCache] Cannot register VRAM without DOCA device. " << "Provide 'host_pci' in backend params."; return NIXL_ERR_BACKEND; } @@ -642,25 +642,25 @@ nixlDocaDmaProxyEngine::registerMem(const nixlBlobDesc &mem, uint32_t permissions = DOCA_ACCESS_FLAG_PCI_READ_WRITE; doca_error_t result = doca_mmap_create(&mmap); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_create => " << doca_error_get_descr(result); + NIXL_DEBUG << "[BlueCache] doca_mmap_create => " << doca_error_get_descr(result); if (result == DOCA_SUCCESS) { result = doca_mmap_add_dev(mmap, dev_); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_add_dev => " << doca_error_get_descr(result); + NIXL_DEBUG << "[BlueCache] doca_mmap_add_dev => " << doca_error_get_descr(result); } if (result == DOCA_SUCCESS) { result = doca_mmap_set_memrange(mmap, gpuPtr, mem.len); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_set_memrange(ptr=" << gpuPtr << ", len=" << mem.len << ") => " << doca_error_get_descr(result); + NIXL_DEBUG << "[BlueCache] doca_mmap_set_memrange(ptr=" << gpuPtr << ", len=" << mem.len << ") => " << doca_error_get_descr(result); } if (result == DOCA_SUCCESS) { result = doca_mmap_set_permissions(mmap, permissions); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_set_permissions(READ_WRITE) => " << doca_error_get_descr(result); + NIXL_DEBUG << "[BlueCache] doca_mmap_set_permissions(READ_WRITE) => " << doca_error_get_descr(result); } if (result == DOCA_SUCCESS) { result = doca_mmap_start(mmap); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_start(READ_WRITE) => " << doca_error_get_descr(result); + NIXL_DEBUG << "[BlueCache] doca_mmap_start(READ_WRITE) => " << doca_error_get_descr(result); } if (result != DOCA_SUCCESS && mmap) { - NIXL_DEBUG << "[DocaDmaProxy] READ_WRITE failed, trying READ_ONLY fallback..."; + NIXL_DEBUG << "[BlueCache] READ_WRITE failed, trying READ_ONLY fallback..."; doca_mmap_destroy(mmap); mmap = nullptr; result = doca_mmap_create(&mmap); @@ -669,31 +669,31 @@ nixlDocaDmaProxyEngine::registerMem(const nixlBlobDesc &mem, if (result == DOCA_SUCCESS) { permissions = DOCA_ACCESS_FLAG_PCI_READ_ONLY; result = doca_mmap_set_permissions(mmap, permissions); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_set_permissions(READ_ONLY) => " << doca_error_get_descr(result); + NIXL_DEBUG << "[BlueCache] doca_mmap_set_permissions(READ_ONLY) => " << doca_error_get_descr(result); } if (result == DOCA_SUCCESS) { result = doca_mmap_start(mmap); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_start(READ_ONLY) => " << doca_error_get_descr(result); + NIXL_DEBUG << "[BlueCache] doca_mmap_start(READ_ONLY) => " << doca_error_get_descr(result); } } if (result == DOCA_SUCCESS) { result = doca_mmap_export_pci(mmap, dev_, &desc, &desc_len); - NIXL_DEBUG << "[DocaDmaProxy] doca_mmap_export_pci => " << doca_error_get_descr(result) << ", desc_len=" << desc_len; + NIXL_DEBUG << "[BlueCache] doca_mmap_export_pci => " << doca_error_get_descr(result) << ", desc_len=" << desc_len; } if (result != DOCA_SUCCESS) { - NIXL_ERROR << "[DocaDmaProxy] registerMem DOCA pipeline failed, final error: " << doca_error_get_descr(result); + NIXL_ERROR << "[BlueCache] registerMem DOCA pipeline failed, final error: " << doca_error_get_descr(result); if (mmap) doca_mmap_destroy(mmap); return NIXL_ERR_BACKEND; } if (desc_len > DMA_EXPORT_DESC_MAX) { - NIXL_ERROR << "[DocaDmaProxy] desc_len " << desc_len << " exceeds max " << DMA_EXPORT_DESC_MAX; + NIXL_ERROR << "[BlueCache] desc_len " << desc_len << " exceeds max " << DMA_EXPORT_DESC_MAX; doca_mmap_destroy(mmap); return NIXL_ERR_BACKEND; } - NIXL_INFO << "[DocaDmaProxy] [" << DOCA_DMA_PROXY_PLUGIN_VERSION << "] registerMem VRAM success, gpuId=" << gpuId << " desc_len=" << desc_len; - out = new nixlDocaDmaProxyMD(mmap, desc, desc_len, gpuPtr, mem.len, gpuId); + NIXL_INFO << "[BlueCache] [" << BLUE_CACHE_PLUGIN_VERSION << "] registerMem VRAM success, gpuId=" << gpuId << " desc_len=" << desc_len; + out = new nixlBlueCacheMD(mmap, desc, desc_len, gpuPtr, mem.len, gpuId); return NIXL_SUCCESS; } @@ -713,7 +713,7 @@ nixlDocaDmaProxyEngine::registerMem(const nixlBlobDesc &mem, if (nixl_mem == OBJ_SEG && !key.empty()) { knownKeys_.insert(key); } - out = new nixlDocaDmaProxyFileMD(key, size); + out = new nixlBlueCacheFileMD(key, size); return NIXL_SUCCESS; } @@ -721,16 +721,16 @@ nixlDocaDmaProxyEngine::registerMem(const nixlBlobDesc &mem, } nixl_status_t -nixlDocaDmaProxyEngine::deregisterMem(nixlBackendMD *meta) { +nixlBlueCacheEngine::deregisterMem(nixlBackendMD *meta) { std::lock_guard lock(mutex_); if (!meta) return NIXL_ERR_INVALID_PARAM; - nixlDocaDmaProxyMD *md = dynamic_cast(meta); + nixlBlueCacheMD *md = dynamic_cast(meta); if (md && md->mmap) { NIXL_DEBUG << "[Doca-DEREG] VRAM md=" << meta; doca_mmap_destroy(md->mmap); } else { - nixlDocaDmaProxyFileMD *file_md = dynamic_cast(meta); + nixlBlueCacheFileMD *file_md = dynamic_cast(meta); if (file_md) { NIXL_DEBUG << "[Doca-DEREG] FILE/OBJ md=" << meta << " path=[" << file_md->path << "]" @@ -744,26 +744,26 @@ nixlDocaDmaProxyEngine::deregisterMem(nixlBackendMD *meta) { } nixl_status_t -nixlDocaDmaProxyEngine::connect(const std::string & /*remote_agent*/) { +nixlBlueCacheEngine::connect(const std::string & /*remote_agent*/) { std::lock_guard lock(mutex_); if (!ch_) return NIXL_ERR_BACKEND; return NIXL_SUCCESS; } nixl_status_t -nixlDocaDmaProxyEngine::disconnect(const std::string & /*remote_agent*/) { +nixlBlueCacheEngine::disconnect(const std::string & /*remote_agent*/) { return NIXL_SUCCESS; } nixl_status_t -nixlDocaDmaProxyEngine::getPublicData(const nixlBackendMD *meta, std::string &str) const { - const nixlDocaDmaProxyMD *md = dynamic_cast(meta); +nixlBlueCacheEngine::getPublicData(const nixlBackendMD *meta, std::string &str) const { + const nixlBlueCacheMD *md = dynamic_cast(meta); if (md) { str.assign(reinterpret_cast(md->exportDesc.data()), md->exportDesc.size()); return NIXL_SUCCESS; } - const nixlDocaDmaProxyFileMD *file_md = dynamic_cast(meta); + const nixlBlueCacheFileMD *file_md = dynamic_cast(meta); if (file_md) { str.clear(); return NIXL_SUCCESS; @@ -772,19 +772,19 @@ nixlDocaDmaProxyEngine::getPublicData(const nixlBackendMD *meta, std::string &st } nixl_status_t -nixlDocaDmaProxyEngine::getConnInfo(std::string &str) const { +nixlBlueCacheEngine::getConnInfo(std::string &str) const { str = serviceName_; return NIXL_SUCCESS; } nixl_status_t -nixlDocaDmaProxyEngine::loadRemoteConnInfo(const std::string & /*remote_agent*/, +nixlBlueCacheEngine::loadRemoteConnInfo(const std::string & /*remote_agent*/, const std::string & /*remote_conn_info*/) { return NIXL_SUCCESS; } nixl_status_t -nixlDocaDmaProxyEngine::prepXfer(const nixl_xfer_op_t &operation, +nixlBlueCacheEngine::prepXfer(const nixl_xfer_op_t &operation, const nixl_meta_dlist_t &local, const nixl_meta_dlist_t &remote, const std::string & /*remote_agent*/, @@ -792,20 +792,20 @@ nixlDocaDmaProxyEngine::prepXfer(const nixl_xfer_op_t &operation, const nixl_opt_b_args_t * /*opt_args*/) const { std::lock_guard lock(mutex_); - NIXL_DEBUG << "[DocaDmaProxy] prepXfer op=" << operation + NIXL_DEBUG << "[BlueCache] prepXfer op=" << operation << " local.count=" << local.descCount() << " remote.count=" << remote.descCount(); if (operation != NIXL_READ && operation != NIXL_WRITE) { - NIXL_ERROR << "[DocaDmaProxy] prepXfer failed: invalid operation"; + NIXL_ERROR << "[BlueCache] prepXfer failed: invalid operation"; return NIXL_ERR_INVALID_PARAM; } if (local.descCount() == 0 || remote.descCount() == 0) { - NIXL_ERROR << "[DocaDmaProxy] prepXfer failed: empty descriptor list"; + NIXL_ERROR << "[BlueCache] prepXfer failed: empty descriptor list"; return NIXL_ERR_INVALID_PARAM; } if (local.descCount() != remote.descCount()) { - NIXL_ERROR << "[DocaDmaProxy] prepXfer failed: local and remote descriptor counts mismatch"; + NIXL_ERROR << "[BlueCache] prepXfer failed: local and remote descriptor counts mismatch"; return NIXL_ERR_INVALID_PARAM; } @@ -813,26 +813,26 @@ nixlDocaDmaProxyEngine::prepXfer(const nixl_xfer_op_t &operation, nixl_mem_t remote_type = remote.getType(); if (local_type != VRAM_SEG || remote_type != OBJ_SEG) { - NIXL_ERROR << "[DocaDmaProxy] Transfer requires VRAM_SEG <-> OBJ_SEG"; + NIXL_ERROR << "[BlueCache] Transfer requires VRAM_SEG <-> OBJ_SEG"; return NIXL_ERR_INVALID_PARAM; } - auto req = new nixlDocaDmaProxyReqH(operation, hostPci_); + auto req = new nixlBlueCacheReqH(operation, hostPci_); auto local_it = local.begin(); auto remote_it = remote.begin(); size_t seg_idx = 0; for (; local_it != local.end() && remote_it != remote.end(); ++local_it, ++remote_it, ++seg_idx) { - nixlDocaDmaProxyMD *md = static_cast(local_it->metadataP); + nixlBlueCacheMD *md = static_cast(local_it->metadataP); if (!md) { - NIXL_ERROR << "[DocaDmaProxy] Invalid local VRAM metadata at segment " << seg_idx; + NIXL_ERROR << "[BlueCache] Invalid local VRAM metadata at segment " << seg_idx; delete req; return NIXL_ERR_INVALID_PARAM; } uint64_t remoteAddr = local_it->addr; - NIXL_DEBUG << "[DocaDmaProxy] [prepXfer] segment[" << seg_idx << "] local_addr=0x" + NIXL_DEBUG << "[BlueCache] [prepXfer] segment[" << seg_idx << "] local_addr=0x" << std::hex << local_it->addr << std::dec << " local_len=" << local_it->len << " remoteAddr=0x" << std::hex << remoteAddr << std::dec @@ -841,10 +841,10 @@ nixlDocaDmaProxyEngine::prepXfer(const nixl_xfer_op_t &operation, << " remote.addr=" << remote_it->addr << " remote.len=" << remote_it->len << " remote.devId=" << remote_it->devId; - nixlDocaDmaProxyFileMD *file_md = - static_cast(remote_it->metadataP); + nixlBlueCacheFileMD *file_md = + static_cast(remote_it->metadataP); if (!file_md) { - NIXL_ERROR << "[DocaDmaProxy] Invalid remote FILE/OBJ metadata"; + NIXL_ERROR << "[BlueCache] Invalid remote FILE/OBJ metadata"; delete req; return NIXL_ERR_INVALID_PARAM; } @@ -854,7 +854,7 @@ nixlDocaDmaProxyEngine::prepXfer(const nixl_xfer_op_t &operation, std::string dpuPath = file_md->path; if (dpuPath.empty()) { - NIXL_ERROR << "[DocaDmaProxy] DPU file path is empty"; + NIXL_ERROR << "[BlueCache] DPU file path is empty"; delete req; return NIXL_ERR_INVALID_PARAM; } @@ -868,14 +868,14 @@ nixlDocaDmaProxyEngine::prepXfer(const nixl_xfer_op_t &operation, if (operation == NIXL_WRITE) { if (xferLen == 0) { - NIXL_ERROR << "[DocaDmaProxy] WRITE requires non-zero transfer length"; + NIXL_ERROR << "[BlueCache] WRITE requires non-zero transfer length"; delete req; return NIXL_ERR_INVALID_PARAM; } uint64_t end_addr = remoteAddr + xferLen; uint64_t gpu_end = reinterpret_cast(md->gpuPtr) + md->len; if (end_addr > gpu_end) { - NIXL_ERROR << "[DocaDmaProxy] WRITE exceeds GPU buffer bounds: " + NIXL_ERROR << "[BlueCache] WRITE exceeds GPU buffer bounds: " << "end_addr=" << end_addr << " gpu_end=" << gpu_end << " gpuPtr=" << md->gpuPtr @@ -889,13 +889,13 @@ nixlDocaDmaProxyEngine::prepXfer(const nixl_xfer_op_t &operation, req->addSegment(remoteAddr, xferLen, dpuPath, md->exportDesc); } - NIXL_DEBUG << "[DocaDmaProxy] Prepared batch transfer with " << req->segments.size() << " segments"; + NIXL_DEBUG << "[BlueCache] Prepared batch transfer with " << req->segments.size() << " segments"; handle = req; return NIXL_SUCCESS; } nixl_status_t -nixlDocaDmaProxyEngine::postXfer(const nixl_xfer_op_t & /*operation*/, +nixlBlueCacheEngine::postXfer(const nixl_xfer_op_t & /*operation*/, const nixl_meta_dlist_t & /*local*/, const nixl_meta_dlist_t & /*remote*/, const std::string & /*remote_agent*/, @@ -903,36 +903,36 @@ nixlDocaDmaProxyEngine::postXfer(const nixl_xfer_op_t & /*operation*/, const nixl_opt_b_args_t * /*opt_args*/) const { std::lock_guard lock(mutex_); - NIXL_DEBUG << "[DocaDmaProxy] postXfer handle=" << handle; + NIXL_DEBUG << "[BlueCache] postXfer handle=" << handle; if (!handle) return NIXL_ERR_INVALID_PARAM; - nixlDocaDmaProxyReqH *req = static_cast(handle); - if (req->state != nixlDocaDmaProxyReqH::PREPARED) { + nixlBlueCacheReqH *req = static_cast(handle); + if (req->state != nixlBlueCacheReqH::PREPARED) { return NIXL_ERR_REPOST_ACTIVE; } if (!ch_) return NIXL_ERR_BACKEND; - req->worker = std::thread(&nixlDocaDmaProxyReqH::executeTransfer, req, ch_, dev_); + req->worker = std::thread(&nixlBlueCacheReqH::executeTransfer, req, ch_, dev_); return NIXL_IN_PROG; } nixl_status_t -nixlDocaDmaProxyEngine::checkXfer(nixlBackendReqH *handle) const { +nixlBlueCacheEngine::checkXfer(nixlBackendReqH *handle) const { if (!handle) return NIXL_ERR_INVALID_PARAM; - nixlDocaDmaProxyReqH *req = static_cast(handle); - nixlDocaDmaProxyReqH::State s = req->state.load(); + nixlBlueCacheReqH *req = static_cast(handle); + nixlBlueCacheReqH::State s = req->state.load(); - if (s == nixlDocaDmaProxyReqH::COMPLETED) { + if (s == nixlBlueCacheReqH::COMPLETED) { if (req->worker.joinable()) req->worker.join(); return NIXL_SUCCESS; - } else if (s == nixlDocaDmaProxyReqH::ERROR) { + } else if (s == nixlBlueCacheReqH::ERROR) { if (req->worker.joinable()) req->worker.join(); return NIXL_ERR_BACKEND; - } else if (s == nixlDocaDmaProxyReqH::IN_PROGRESS) { + } else if (s == nixlBlueCacheReqH::IN_PROGRESS) { return NIXL_IN_PROG; } @@ -940,7 +940,7 @@ nixlDocaDmaProxyEngine::checkXfer(nixlBackendReqH *handle) const { } nixl_status_t -nixlDocaDmaProxyEngine::queryMem(const nixl_reg_dlist_t &descs, +nixlBlueCacheEngine::queryMem(const nixl_reg_dlist_t &descs, std::vector &resp) const { std::lock_guard lock(mutex_); for (auto it = descs.begin(); it != descs.end(); ++it) { @@ -951,10 +951,10 @@ nixlDocaDmaProxyEngine::queryMem(const nixl_reg_dlist_t &descs, } nixl_status_t -nixlDocaDmaProxyEngine::releaseReqH(nixlBackendReqH *handle) const { +nixlBlueCacheEngine::releaseReqH(nixlBackendReqH *handle) const { if (!handle) return NIXL_SUCCESS; - nixlDocaDmaProxyReqH *req = static_cast(handle); + nixlBlueCacheReqH *req = static_cast(handle); if (req->worker.joinable()) req->worker.join(); delete req; return NIXL_SUCCESS; diff --git a/nixl-plugin/src/doca_dma_proxy_backend.h b/nixl-plugin/src/blue_cache_backend.h similarity index 86% rename from nixl-plugin/src/doca_dma_proxy_backend.h rename to nixl-plugin/src/blue_cache_backend.h index ca56b55..617dc1c 100644 --- a/nixl-plugin/src/doca_dma_proxy_backend.h +++ b/nixl-plugin/src/blue_cache_backend.h @@ -3,10 +3,10 @@ * SPDX-License-Identifier: Apache-2.0 */ -#ifndef NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DOCA_DMA_PROXY_BACKEND_H -#define NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DOCA_DMA_PROXY_BACKEND_H +#ifndef NIXL_SRC_PLUGINS_BLUE_CACHE_BLUE_CACHE_BACKEND_H +#define NIXL_SRC_PLUGINS_BLUE_CACHE_BLUE_CACHE_BACKEND_H -#define DOCA_DMA_PROXY_PLUGIN_VERSION "0.7.0" +#define BLUE_CACHE_PLUGIN_VERSION "0.7.0" #include #include @@ -29,7 +29,7 @@ struct ctrl_channel; struct doca_dev; struct doca_mmap; -class nixlDocaDmaProxyMD : public nixlBackendMD { +class nixlBlueCacheMD : public nixlBackendMD { public: struct doca_mmap* mmap; std::vector exportDesc; @@ -37,7 +37,7 @@ class nixlDocaDmaProxyMD : public nixlBackendMD { size_t len; int gpuId; - nixlDocaDmaProxyMD(struct doca_mmap *m, + nixlBlueCacheMD(struct doca_mmap *m, const void *desc, size_t descLen, void *ptr, @@ -52,18 +52,18 @@ class nixlDocaDmaProxyMD : public nixlBackendMD { gpuId(g) {} }; -class nixlDocaDmaProxyFileMD : public nixlBackendMD { +class nixlBlueCacheFileMD : public nixlBackendMD { public: std::string path; size_t size; - nixlDocaDmaProxyFileMD(const std::string &p, size_t s) + nixlBlueCacheFileMD(const std::string &p, size_t s) : nixlBackendMD(false), path(p), size(s) {} }; -class nixlDocaDmaProxyReqH : public nixlBackendReqH { +class nixlBlueCacheReqH : public nixlBackendReqH { public: enum State { PREPARED, IN_PROGRESS, COMPLETED, ERROR }; @@ -83,7 +83,7 @@ class nixlDocaDmaProxyReqH : public nixlBackendReqH { int errorCode; dma_transfer_response_t response; - nixlDocaDmaProxyReqH(const nixl_xfer_op_t &op, const std::string &host_pci) + nixlBlueCacheReqH(const nixl_xfer_op_t &op, const std::string &host_pci) : operation(op), hostPci(host_pci), state(PREPARED), @@ -96,7 +96,7 @@ class nixlDocaDmaProxyReqH : public nixlBackendReqH { segments.push_back({raddr, xlen, path, desc}); } - ~nixlDocaDmaProxyReqH() { + ~nixlBlueCacheReqH() { if (worker.joinable()) worker.join(); } @@ -104,10 +104,10 @@ class nixlDocaDmaProxyReqH : public nixlBackendReqH { struct doca_dev* dev); }; -class nixlDocaDmaProxyEngine : public nixlBackendEngine { +class nixlBlueCacheEngine : public nixlBackendEngine { public: - explicit nixlDocaDmaProxyEngine(const nixlBackendInitParams *init_params); - ~nixlDocaDmaProxyEngine(); + explicit nixlBlueCacheEngine(const nixlBackendInitParams *init_params); + ~nixlBlueCacheEngine(); bool supportsRemote() const override { return false; } bool supportsLocal() const override { return true; } diff --git a/nixl-plugin/src/doca_dma_proxy_plugin.cpp b/nixl-plugin/src/blue_cache_plugin.cpp similarity index 52% rename from nixl-plugin/src/doca_dma_proxy_plugin.cpp rename to nixl-plugin/src/blue_cache_plugin.cpp index 628f723..d5be4e1 100644 --- a/nixl-plugin/src/doca_dma_proxy_plugin.cpp +++ b/nixl-plugin/src/blue_cache_plugin.cpp @@ -3,18 +3,18 @@ * SPDX-License-Identifier: Apache-2.0 */ -#include "doca_dma_proxy_backend.h" +#include "blue_cache_backend.h" #include "backend/backend_plugin.h" -using doca_dma_proxy_plugin_t = nixlBackendPluginCreator; +using blue_cache_plugin_t = nixlBackendPluginCreator; -#ifdef STATIC_PLUGIN_DOCA_DMA_PROXY +#ifdef STATIC_PLUGIN_BLUE_CACHE // Static plugin entry point nixlBackendPlugin* -createStaticDocaDmaProxyPlugin() { - return doca_dma_proxy_plugin_t::create(NIXL_PLUGIN_API_VERSION, - "DOCA_DMA_PROXY", - DOCA_DMA_PROXY_PLUGIN_VERSION, +createStaticBlueCachePlugin() { + return blue_cache_plugin_t::create(NIXL_PLUGIN_API_VERSION, + "BLUE_CACHE", + BLUE_CACHE_PLUGIN_VERSION, {}, {VRAM_SEG, OBJ_SEG}); } @@ -22,9 +22,9 @@ createStaticDocaDmaProxyPlugin() { // Dynamic plugin entry points extern "C" NIXL_PLUGIN_EXPORT nixlBackendPlugin* nixl_plugin_init() { - return doca_dma_proxy_plugin_t::create(NIXL_PLUGIN_API_VERSION, - "DOCA_DMA_PROXY", - DOCA_DMA_PROXY_PLUGIN_VERSION, + return blue_cache_plugin_t::create(NIXL_PLUGIN_API_VERSION, + "BLUE_CACHE", + BLUE_CACHE_PLUGIN_VERSION, {}, {VRAM_SEG, OBJ_SEG}); } diff --git a/nixl-plugin/src/comch_ctrl.h b/nixl-plugin/src/comch_ctrl.h index 4e9798d..9b8fd8d 100644 --- a/nixl-plugin/src/comch_ctrl.h +++ b/nixl-plugin/src/comch_ctrl.h @@ -3,8 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -#ifndef NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_COMCH_CTRL_H -#define NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_COMCH_CTRL_H +#ifndef NIXL_SRC_PLUGINS_BLUE_CACHE_COMCH_CTRL_H +#define NIXL_SRC_PLUGINS_BLUE_CACHE_COMCH_CTRL_H #include @@ -51,4 +51,4 @@ doca_error_t comch_ctrl_wait_for_message(struct comch_ctrl *ctrl, } #endif -#endif // NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_COMCH_CTRL_H +#endif // NIXL_SRC_PLUGINS_BLUE_CACHE_COMCH_CTRL_H diff --git a/nixl-plugin/src/ctrl_channel.h b/nixl-plugin/src/ctrl_channel.h index c0bc378..1d17d0a 100644 --- a/nixl-plugin/src/ctrl_channel.h +++ b/nixl-plugin/src/ctrl_channel.h @@ -3,8 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -#ifndef NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_CTRL_CHANNEL_H -#define NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_CTRL_CHANNEL_H +#ifndef NIXL_SRC_PLUGINS_BLUE_CACHE_CTRL_CHANNEL_H +#define NIXL_SRC_PLUGINS_BLUE_CACHE_CTRL_CHANNEL_H #include @@ -73,4 +73,4 @@ doca_error_t ctrl_channel_progress(struct ctrl_channel *ch); } #endif -#endif // NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_CTRL_CHANNEL_H +#endif // NIXL_SRC_PLUGINS_BLUE_CACHE_CTRL_CHANNEL_H diff --git a/nixl-plugin/src/doca_device_utils.h b/nixl-plugin/src/doca_device_utils.h index 2cbe056..60fe12c 100644 --- a/nixl-plugin/src/doca_device_utils.h +++ b/nixl-plugin/src/doca_device_utils.h @@ -3,8 +3,8 @@ * SPDX-License-Identifier: Apache-2.0 */ -#ifndef NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DOCA_DEVICE_UTILS_H -#define NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DOCA_DEVICE_UTILS_H +#ifndef NIXL_SRC_PLUGINS_BLUE_CACHE_DOCA_DEVICE_UTILS_H +#define NIXL_SRC_PLUGINS_BLUE_CACHE_DOCA_DEVICE_UTILS_H #include @@ -29,4 +29,4 @@ doca_error_t open_dev_rep_by_pci(struct doca_dev *local, } #endif -#endif // NIXL_SRC_PLUGINS_DOCA_DMA_PROXY_DOCA_DEVICE_UTILS_H +#endif // NIXL_SRC_PLUGINS_BLUE_CACHE_DOCA_DEVICE_UTILS_H diff --git a/nixl-plugin/src/meson.build b/nixl-plugin/src/meson.build index 81e2feb..c4da626 100644 --- a/nixl-plugin/src/meson.build +++ b/nixl-plugin/src/meson.build @@ -13,36 +13,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -plugin_doca_dma_proxy_deps = [ +plugin_blue_cache_deps = [ doca_common_dep, doca_comch_dep, doca_dma_dep, cuda_dep, ] -doca_dma_proxy_sources = files( - 'doca_dma_proxy_backend.cpp', - 'doca_dma_proxy_plugin.cpp', +blue_cache_sources = files( + 'blue_cache_backend.cpp', + 'blue_cache_plugin.cpp', 'ctrl_channel.cpp', 'comch_ctrl.cpp', 'doca_device_utils.cpp', ) -doca_dma_proxy_inc = include_directories('.') +blue_cache_inc = include_directories('.') -if 'DOCA_DMA_PROXY' in static_plugins - doca_dma_proxy_lib = static_library('DOCA_DMA_PROXY', - doca_dma_proxy_sources, - dependencies: [nixl_infra, serdes_interface, plugin_doca_dma_proxy_deps, absl_log_dep], - include_directories: [nixl_inc_dirs, utils_inc_dirs, doca_dma_proxy_inc], +if 'BLUE_CACHE' in static_plugins + blue_cache_lib = static_library('BLUE_CACHE', + blue_cache_sources, + dependencies: [nixl_infra, serdes_interface, plugin_blue_cache_deps, absl_log_dep], + include_directories: [nixl_inc_dirs, utils_inc_dirs, blue_cache_inc], install: true, name_prefix: 'libplugin_', install_dir: plugin_install_dir) else - doca_dma_proxy_lib = shared_library('DOCA_DMA_PROXY', - doca_dma_proxy_sources, - dependencies: [nixl_infra, serdes_interface, plugin_doca_dma_proxy_deps, absl_log_dep], - include_directories: [nixl_inc_dirs, utils_inc_dirs, doca_dma_proxy_inc], + blue_cache_lib = shared_library('BLUE_CACHE', + blue_cache_sources, + dependencies: [nixl_infra, serdes_interface, plugin_blue_cache_deps, absl_log_dep], + include_directories: [nixl_inc_dirs, utils_inc_dirs, blue_cache_inc], install: true, cpp_args: ['-fPIC'], name_prefix: 'libplugin_', @@ -50,10 +50,10 @@ else if get_option('buildtype') == 'debug' run_command('sh', '-c', - 'echo "DOCA_DMA_PROXY=' + doca_dma_proxy_lib.full_path() + '" >> ' + plugin_build_dir + '/pluginlist', + 'echo "BLUE_CACHE=' + blue_cache_lib.full_path() + '" >> ' + plugin_build_dir + '/pluginlist', check: true ) endif endif -doca_dma_proxy_backend_interface = declare_dependency(link_with: doca_dma_proxy_lib) +blue_cache_backend_interface = declare_dependency(link_with: blue_cache_lib) diff --git a/scripts/build_all.sh b/scripts/build_all.sh index 39e4db3..c1c7f6d 100755 --- a/scripts/build_all.sh +++ b/scripts/build_all.sh @@ -30,9 +30,9 @@ if [ -n "$NIXL_SRC" ]; then "$SCRIPT_DIR/patch_nixl.sh" "$NIXL_SRC" echo "" - echo "=== Building NIXL with DOCA_DMA_PROXY plugin ===" + echo "=== Building NIXL with BLUE_CACHE plugin ===" if [ ! -d "$NIXL_SRC/build" ]; then - meson setup "$NIXL_SRC/build" -Denable_plugins=DOCA_DMA_PROXY "$NIXL_SRC" + meson setup "$NIXL_SRC/build" -Denable_plugins=BLUE_CACHE "$NIXL_SRC" fi ninja -C "$NIXL_SRC/build" echo "[OK] NIXL build complete" @@ -52,6 +52,6 @@ make -j"$(nproc)" echo "" echo "============================================================" echo "Build complete." -echo " DPU agent: $PROJECT_DIR/build/dpu-agent/dpu_dma_copy" +echo " DPU agent: $PROJECT_DIR/build/blue-cache/blue-cache" echo " Examples: $PROJECT_DIR/build/examples/cpp/" echo "============================================================" diff --git a/scripts/patch_nixl.sh b/scripts/patch_nixl.sh index 7e55c78..0405fbd 100755 --- a/scripts/patch_nixl.sh +++ b/scripts/patch_nixl.sh @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# Patch a NIXL source tree so it can build the DOCA_DMA_PROXY backend plugin. +# Patch a NIXL source tree so it can build the BLUE_CACHE backend plugin. # # Usage: # ./scripts/patch_nixl.sh /path/to/nixl/source @@ -26,7 +26,7 @@ if [ ! -d "$NIXL_SRC/src/plugins" ] || [ ! -f "$NIXL_SRC/meson.build" ]; then fi NIXL_SRC="$(cd "$NIXL_SRC" && pwd)" -PLUGIN_DIR="$NIXL_SRC/src/plugins/doca_dma_proxy" +PLUGIN_DIR="$NIXL_SRC/src/plugins/blue_cache" echo "============================================================" echo "Patching NIXL source tree: $NIXL_SRC" @@ -41,27 +41,27 @@ cp -L "$PROJECT_DIR/nixl-plugin/src/meson.build" "$PLUGIN_DIR/" cp -L "$PROJECT_DIR/common/include/dma_transfer.h" "$PLUGIN_DIR/" echo "[OK] Copied plugin source files to $PLUGIN_DIR" -# 2. Patch root meson.build to include DOCA_DMA_PROXY in all_plugins. +# 2. Patch root meson.build to include BLUE_CACHE in all_plugins. python3 - "$NIXL_SRC/meson.build" <<'PY' import sys path = sys.argv[1] with open(path, 'r') as f: text = f.read() -marker = "'DOCA_DMA_PROXY'" +marker = "'BLUE_CACHE'" if marker not in text: # Insert into the all_plugins list before the closing bracket. old = "all_plugins = ['UCX', 'LIBFABRIC', 'POSIX', 'OBJ', 'GDS', 'GDS_MT', 'MOONCAKE', 'HF3FS', 'GUSLI', 'GPUNETIO', 'UCCL', 'AZURE_BLOB'" - new = "all_plugins = ['UCX', 'LIBFABRIC', 'POSIX', 'OBJ', 'GDS', 'GDS_MT', 'MOONCAKE', 'HF3FS', 'GUSLI', 'GPUNETIO', 'UCCL', 'AZURE_BLOB', 'DOCA_DMA_PROXY'" + new = "all_plugins = ['UCX', 'LIBFABRIC', 'POSIX', 'OBJ', 'GDS', 'GDS_MT', 'MOONCAKE', 'HF3FS', 'GUSLI', 'GPUNETIO', 'UCCL', 'AZURE_BLOB', 'BLUE_CACHE'" if old in text: text = text.replace(old, new, 1) with open(path, 'w') as f: f.write(text) - print("[OK] Patched root meson.build: added DOCA_DMA_PROXY to all_plugins") + print("[OK] Patched root meson.build: added BLUE_CACHE to all_plugins") else: print("[WARN] Could not locate all_plugins list in root meson.build; manual check needed") else: - print("[OK] Root meson.build already contains DOCA_DMA_PROXY") + print("[OK] Root meson.build already contains BLUE_CACHE") PY # 3. Patch src/plugins/meson.build to probe DOCA deps and conditionally build the plugin. @@ -85,27 +85,27 @@ else: print("[OK] DOCA dependency probes already present in src/plugins/meson.build") plugin_block = """ -# DOCA DMA Proxy plugin for GPU <-> DPU transfers via DOCA Comch + DMA -if enabled_plugins.get('DOCA_DMA_PROXY') +# BlueCache (BLUE_CACHE) plugin for GPU <-> DPU transfers via DOCA Comch + DMA +if enabled_plugins.get('BLUE_CACHE') if (not cuda_dep.found() or not doca_common_dep.found() or not doca_comch_dep.found() or not doca_dma_dep.found()) and is_explicit_enable if not cuda_dep.found() - error('DOCA_DMA_PROXY plugin requested but CUDA dependency not found') + error('BLUE_CACHE plugin requested but CUDA dependency not found') else - error('DOCA_DMA_PROXY plugin requested but DOCA dependency not found') + error('BLUE_CACHE plugin requested but DOCA dependency not found') endif elif cuda_dep.found() and doca_common_dep.found() and doca_comch_dep.found() and doca_dma_dep.found() - subdir('doca_dma_proxy') + subdir('blue_cache') endif endif """ -if "DOCA DMA Proxy plugin for GPU <-> DPU transfers" not in text: +if "BlueCache (BLUE_CACHE) plugin for GPU <-> DPU transfers" not in text: text = text.rstrip() + "\n" + plugin_block with open(path, 'w') as f: f.write(text) - print("[OK] Added DOCA_DMA_PROXY conditional build to src/plugins/meson.build") + print("[OK] Added BLUE_CACHE conditional build to src/plugins/meson.build") else: - print("[OK] DOCA_DMA_PROXY conditional build already present in src/plugins/meson.build") + print("[OK] BLUE_CACHE conditional build already present in src/plugins/meson.build") PY # 4. Patch src/core/nixl_plugin_manager.cpp for static plugin registration. @@ -115,14 +115,14 @@ path = sys.argv[1] with open(path, 'r') as f: text = f.read() -marker = "STATIC_PLUGIN_DOCA_DMA_PROXY" +marker = "STATIC_PLUGIN_BLUE_CACHE" if marker in text: print("[OK] Static plugin registration already present in nixl_plugin_manager.cpp") else: # Insert before the telemetry buffer registration line. old = " NIXL_REGISTER_STATIC_PLUGIN(Telemetry, BUFFER)" - new = """#ifdef STATIC_PLUGIN_DOCA_DMA_PROXY - NIXL_REGISTER_STATIC_PLUGIN(Backend, DOCA_DMA_PROXY) + new = """#ifdef STATIC_PLUGIN_BLUE_CACHE + NIXL_REGISTER_STATIC_PLUGIN(Backend, BLUE_CACHE) #endif NIXL_REGISTER_STATIC_PLUGIN(Telemetry, BUFFER)""" @@ -130,7 +130,7 @@ else: text = text.replace(old, new, 1) with open(path, 'w') as f: f.write(text) - print("[OK] Patched nixl_plugin_manager.cpp: added DOCA_DMA_PROXY static registration") + print("[OK] Patched nixl_plugin_manager.cpp: added BLUE_CACHE static registration") else: print("[WARN] Could not locate static plugin registration block; manual check needed") PY @@ -140,6 +140,6 @@ echo "============================================================" echo "Patch complete." echo "Next steps:" echo " cd $NIXL_SRC" -echo " meson setup build -Denable_plugins=DOCA_DMA_PROXY" +echo " meson setup build -Denable_plugins=BLUE_CACHE" echo " ninja -C build" echo "============================================================"