From 330a60a51c0bc12f2b60196404d2425710d3d5be Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 30 Mar 2026 16:08:29 -0700 Subject: [PATCH 01/14] Use 1ES.Pool=diskann-github --- .github/workflows/ci.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 375c3307e..788b6dd26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -342,14 +342,7 @@ jobs: test-workspace: needs: basics name: test workspace - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: - - windows-latest - - ubuntu-latest - - ubuntu-24.04-arm - - macos-latest + runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] steps: - uses: actions/checkout@v4 From d42f85a5ed180aaf15605834136ac1ec1e36294a Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 11 May 2026 12:40:58 -0700 Subject: [PATCH 02/14] Revert "Use 1ES.Pool=diskann-github" This reverts commit 330a60a51c0bc12f2b60196404d2425710d3d5be. --- .github/workflows/ci.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 788b6dd26..375c3307e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -342,7 +342,14 @@ jobs: test-workspace: needs: basics name: test workspace - runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - windows-latest + - ubuntu-latest + - ubuntu-24.04-arm + - macos-latest steps: - uses: actions/checkout@v4 From e2d7c789fccd440a7f376af4be431478c45d637d Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 11 May 2026 12:45:44 -0700 Subject: [PATCH 03/14] Update benchmark workflows to use self-hosted runners for improved performance --- .github/workflows/disk-benchmarks-aa.yml | 2 +- .github/workflows/disk-benchmarks.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index 800acbdfb..d15ad629d 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -37,7 +37,7 @@ jobs: # A/A benchmark: run main vs main to detect environment noise. aa-benchmark: name: A/A - ${{ matrix.dataset }} - runs-on: ubuntu-latest + runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] timeout-minutes: 120 strategy: fail-fast: false diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index 919e74c15..63e3f7060 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -54,7 +54,7 @@ jobs: # Macro benchmark: compare current branch against baseline macro-benchmark: name: Macro Benchmark - ${{ matrix.dataset }} - runs-on: ubuntu-latest + runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] # TODO: For production benchmarks, consider using a self-hosted runner with: # - NVMe storage for consistent I/O performance # - CPU pinning (taskset) for reduced variance From 637067c8b58e29d620bd5085924e34561414c60d Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 11 May 2026 13:10:43 -0700 Subject: [PATCH 04/14] Install Rustup --- .github/actions/setup-disk-benchmark/action.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-disk-benchmark/action.yml b/.github/actions/setup-disk-benchmark/action.yml index 19c6fcbaa..e7c63da44 100644 --- a/.github/actions/setup-disk-benchmark/action.yml +++ b/.github/actions/setup-disk-benchmark/action.yml @@ -23,7 +23,10 @@ runs: steps: - name: Install Rust shell: bash - run: rustup show + run: | + sudo apt-get update + sudo apt-get install -y rustup + rustup show - name: Install system dependencies shell: bash From db6ade499c95acb68bb4dffce3b5bac494deff1e Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 11 May 2026 15:44:08 -0700 Subject: [PATCH 05/14] Install Rustup --- .github/actions/setup-disk-benchmark/action.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/actions/setup-disk-benchmark/action.yml b/.github/actions/setup-disk-benchmark/action.yml index e7c63da44..b98c1c1a3 100644 --- a/.github/actions/setup-disk-benchmark/action.yml +++ b/.github/actions/setup-disk-benchmark/action.yml @@ -24,8 +24,10 @@ runs: - name: Install Rust shell: bash run: | - sudo apt-get update - sudo apt-get install -y rustup + if ! command -v rustup &>/dev/null; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + fi rustup show - name: Install system dependencies From e7acfa004845cad988dd17c44fb3d07c86e53b07 Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 11 May 2026 17:39:48 -0700 Subject: [PATCH 06/14] Move to a separate step: rustup show --- .github/actions/setup-disk-benchmark/action.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-disk-benchmark/action.yml b/.github/actions/setup-disk-benchmark/action.yml index b98c1c1a3..f0db8d66b 100644 --- a/.github/actions/setup-disk-benchmark/action.yml +++ b/.github/actions/setup-disk-benchmark/action.yml @@ -28,7 +28,10 @@ runs: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" fi - rustup show + + - name: Show Rust toolchain + shell: bash + run: rustup show - name: Install system dependencies shell: bash From 699e4be25919547f40d4b8f232ba6e52b684773b Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 11 May 2026 18:37:34 -0700 Subject: [PATCH 07/14] Mount SSD --- .../actions/setup-disk-benchmark/action.yml | 12 +++++++ .github/workflows/disk-benchmarks-aa.yml | 31 ++++++++++--------- .github/workflows/disk-benchmarks.yml | 30 ++++++++++-------- 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/.github/actions/setup-disk-benchmark/action.yml b/.github/actions/setup-disk-benchmark/action.yml index f0db8d66b..3621630a0 100644 --- a/.github/actions/setup-disk-benchmark/action.yml +++ b/.github/actions/setup-disk-benchmark/action.yml @@ -17,6 +17,10 @@ inputs: extract-to: description: 'Directory to extract the dataset into' required: true + nvme-mount-path: + description: 'Mount point for the high-speed NVMe SSD' + required: false + default: '/mnt/nvme' runs: using: composite @@ -33,6 +37,14 @@ runs: shell: bash run: rustup show + - name: Mount high-speed NVMe SSD + shell: bash + run: | + sudo mkdir -p -m 777 ${{ inputs.nvme-mount-path }} + sudo lsblk + sudo mkfs.ext4 /dev/nvme0n1 + sudo mount /dev/nvme0n1 ${{ inputs.nvme-mount-path }} + - name: Install system dependencies shell: bash run: | diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index d15ad629d..e2689d609 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -24,6 +24,8 @@ concurrency: env: RUST_BACKTRACE: 1 PERF_INPUTS: diskann-benchmark/perf_test_inputs + NVME_MOUNT_PATH: /mnt/nvme + DISKANN_RUST: /mnt/nvme/diskann_rust defaults: run: @@ -51,41 +53,42 @@ jobs: archive: openai-100K.tar.gz steps: + - name: Setup benchmark environment + uses: ${{ env.DISKANN_RUST }}/.github/actions/setup-disk-benchmark + with: + dataset: ${{ matrix.dataset }} + archive: ${{ matrix.archive }} + extract-to: ${{ env.DISKANN_RUST }}/target/tmp + nvme-mount-path: ${{ env.NVME_MOUNT_PATH }} + - name: Checkout main uses: actions/checkout@v4 with: ref: main - path: diskann_rust + path: ${{ env.DISKANN_RUST }} lfs: true - - name: Setup benchmark environment - uses: ./diskann_rust/.github/actions/setup-disk-benchmark - with: - dataset: ${{ matrix.dataset }} - archive: ${{ matrix.archive }} - extract-to: diskann_rust/target/tmp - # A/A: build once, run twice (identical code — only detecting environment noise) - name: Build benchmark binary - working-directory: diskann_rust + working-directory: ${{ env.DISKANN_RUST }} run: cargo build -p diskann-benchmark --features disk-index --release - name: Run baseline benchmark - working-directory: diskann_rust + working-directory: ${{ env.DISKANN_RUST }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_baseline.json - name: Run target benchmark - working-directory: diskann_rust + working-directory: ${{ env.DISKANN_RUST }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_target.json - name: Validate benchmark results - working-directory: diskann_rust + working-directory: ${{ env.DISKANN_RUST }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ check run \ @@ -100,8 +103,8 @@ jobs: with: name: aa-results-${{ matrix.dataset }} path: | - diskann_rust/target/tmp/${{ matrix.dataset }}_target.json - diskann_rust/target/tmp/${{ matrix.dataset }}_baseline.json + ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.dataset }}_target.json + ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.dataset }}_baseline.json retention-days: 30 # Notify diskann-disk-maintainers on A/A failure — but only when the failure diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index 63e3f7060..098e302fe 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -42,6 +42,9 @@ concurrency: env: RUST_BACKTRACE: 1 PERF_INPUTS: diskann-benchmark/perf_test_inputs + NVME_MOUNT_PATH: /mnt/nvme + DISKANN_RUST: /mnt/nvme/diskann_rust + BASELINE: /mnt/nvme/baseline defaults: run: @@ -77,50 +80,51 @@ jobs: - name: Checkout current branch uses: actions/checkout@v4 with: - path: diskann_rust + path: ${{ env.DISKANN_RUST }} lfs: true - name: Checkout baseline (${{ inputs.baseline_ref || 'main' }}) uses: actions/checkout@v4 with: ref: ${{ inputs.baseline_ref || 'main' }} - path: baseline + path: ${{ env.BASELINE }} lfs: true - name: Setup benchmark environment - uses: ./diskann_rust/.github/actions/setup-disk-benchmark + uses: ${{ env.DISKANN_RUST }}/.github/actions/setup-disk-benchmark with: dataset: ${{ matrix.dataset }} archive: ${{ matrix.archive }} - extract-to: diskann_rust/target/tmp + extract-to: ${{ env.DISKANN_RUST }}/target/tmp + nvme-mount-path: ${{ env.NVME_MOUNT_PATH }} - name: Copy dataset to baseline run: | - mkdir -p baseline/target/tmp - cp -r diskann_rust/target/tmp/${{ matrix.data_dir }} baseline/target/tmp/ + mkdir -p ${{ env.BASELINE }}/target/tmp + cp -r ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.data_dir }} ${{ env.BASELINE }}/target/tmp/ - name: Run baseline benchmark - working-directory: baseline + working-directory: ${{ env.BASELINE }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ - run --input-file ../diskann_rust/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ + run --input-file ../${{ env.DISKANN_RUST }}/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_baseline.json - name: Run current branch benchmark - working-directory: diskann_rust + working-directory: ${{ env.DISKANN_RUST }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_target.json - name: Validate benchmark results - working-directory: diskann_rust + working-directory: ${{ env.DISKANN_RUST }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ check run \ --tolerances ${{ env.PERF_INPUTS }}/disk-index-tolerances.json \ --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ - --before ../baseline/target/tmp/${{ matrix.dataset }}_baseline.json \ + --before ${{ env.BASELINE }}/target/tmp/${{ matrix.dataset }}_baseline.json \ --after target/tmp/${{ matrix.dataset }}_target.json - name: Upload benchmark results @@ -129,6 +133,6 @@ jobs: with: name: benchmark-results-${{ matrix.dataset }} path: | - diskann_rust/target/tmp/${{ matrix.dataset }}_target.json - baseline/target/tmp/${{ matrix.dataset }}_baseline.json + ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.dataset }}_target.json + ${{ env.BASELINE }}/target/tmp/${{ matrix.dataset }}_baseline.json retention-days: 30 \ No newline at end of file From bcb5e4127db55fb0c0af8ccde450cd05bb5fddc1 Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 11 May 2026 19:04:35 -0700 Subject: [PATCH 08/14] Checkout action definitions --- .github/workflows/disk-benchmarks-aa.yml | 9 ++++++++- .github/workflows/disk-benchmarks.yml | 25 +++++++++++++++--------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index e2689d609..9ded86fa5 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -53,8 +53,15 @@ jobs: archive: openai-100K.tar.gz steps: + # Shallow checkout to $GITHUB_WORKSPACE so the composite action is accessible via uses:. + - name: Checkout action definitions + uses: actions/checkout@v4 + with: + sparse-checkout: .github + path: github_actions + - name: Setup benchmark environment - uses: ${{ env.DISKANN_RUST }}/.github/actions/setup-disk-benchmark + uses: ./github_actions/.github/actions/setup-disk-benchmark with: dataset: ${{ matrix.dataset }} archive: ${{ matrix.archive }} diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index 098e302fe..f2c5124a0 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -77,6 +77,21 @@ jobs: data_dir: OpenAIArXiv steps: + # Shallow checkout to $GITHUB_WORKSPACE so the composite action is accessible via uses:. + - name: Checkout action definitions + uses: actions/checkout@v4 + with: + sparse-checkout: .github + path: github_actions + + - name: Setup benchmark environment + uses: ./github_actions/.github/actions/setup-disk-benchmark + with: + dataset: ${{ matrix.dataset }} + archive: ${{ matrix.archive }} + extract-to: ${{ env.DISKANN_RUST }}/target/tmp + nvme-mount-path: ${{ env.NVME_MOUNT_PATH }} + - name: Checkout current branch uses: actions/checkout@v4 with: @@ -90,14 +105,6 @@ jobs: path: ${{ env.BASELINE }} lfs: true - - name: Setup benchmark environment - uses: ${{ env.DISKANN_RUST }}/.github/actions/setup-disk-benchmark - with: - dataset: ${{ matrix.dataset }} - archive: ${{ matrix.archive }} - extract-to: ${{ env.DISKANN_RUST }}/target/tmp - nvme-mount-path: ${{ env.NVME_MOUNT_PATH }} - - name: Copy dataset to baseline run: | mkdir -p ${{ env.BASELINE }}/target/tmp @@ -107,7 +114,7 @@ jobs: working-directory: ${{ env.BASELINE }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ - run --input-file ../${{ env.DISKANN_RUST }}/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ + run --input-file ${{ env.DISKANN_RUST }}/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_baseline.json - name: Run current branch benchmark From e7e8b7806e36c1e226e418c60ac645996cfdcf8e Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Tue, 12 May 2026 17:16:01 -0700 Subject: [PATCH 09/14] sudo chmod 777 ${{ inputs.nvme-mount-path }} --- .github/actions/setup-disk-benchmark/action.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-disk-benchmark/action.yml b/.github/actions/setup-disk-benchmark/action.yml index 3621630a0..7ad7d4174 100644 --- a/.github/actions/setup-disk-benchmark/action.yml +++ b/.github/actions/setup-disk-benchmark/action.yml @@ -40,10 +40,11 @@ runs: - name: Mount high-speed NVMe SSD shell: bash run: | - sudo mkdir -p -m 777 ${{ inputs.nvme-mount-path }} + sudo mkdir -p ${{ inputs.nvme-mount-path }} sudo lsblk sudo mkfs.ext4 /dev/nvme0n1 sudo mount /dev/nvme0n1 ${{ inputs.nvme-mount-path }} + sudo chmod 777 ${{ inputs.nvme-mount-path }} - name: Install system dependencies shell: bash From 8e698125964bc03b991a3a34140c77d17a64b5fc Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Tue, 12 May 2026 17:53:11 -0700 Subject: [PATCH 10/14] Paths are relative to $GITHUB_WORKSPACE --- .github/workflows/disk-benchmarks-aa.yml | 4 ++-- .github/workflows/disk-benchmarks.yml | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index 9ded86fa5..d5669ae11 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -24,8 +24,8 @@ concurrency: env: RUST_BACKTRACE: 1 PERF_INPUTS: diskann-benchmark/perf_test_inputs - NVME_MOUNT_PATH: /mnt/nvme - DISKANN_RUST: /mnt/nvme/diskann_rust + NVME_MOUNT_PATH: nvme + DISKANN_RUST: nvme/diskann_rust defaults: run: diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index f2c5124a0..e93e62b75 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -42,9 +42,9 @@ concurrency: env: RUST_BACKTRACE: 1 PERF_INPUTS: diskann-benchmark/perf_test_inputs - NVME_MOUNT_PATH: /mnt/nvme - DISKANN_RUST: /mnt/nvme/diskann_rust - BASELINE: /mnt/nvme/baseline + NVME_MOUNT_PATH: nvme + DISKANN_RUST: nvme/diskann_rust + BASELINE: nvme/baseline defaults: run: @@ -114,7 +114,7 @@ jobs: working-directory: ${{ env.BASELINE }} run: | cargo run -p diskann-benchmark --features disk-index --release -- \ - run --input-file ${{ env.DISKANN_RUST }}/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ + run --input-file ../../${{ env.DISKANN_RUST }}/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_baseline.json - name: Run current branch benchmark @@ -131,7 +131,7 @@ jobs: check run \ --tolerances ${{ env.PERF_INPUTS }}/disk-index-tolerances.json \ --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ - --before ${{ env.BASELINE }}/target/tmp/${{ matrix.dataset }}_baseline.json \ + --before ../../${{ env.BASELINE }}/target/tmp/${{ matrix.dataset }}_baseline.json \ --after target/tmp/${{ matrix.dataset }}_target.json - name: Upload benchmark results From e8380d4ea3a7a08f435ee8470b2237525c9e6e79 Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Tue, 12 May 2026 21:45:38 -0700 Subject: [PATCH 11/14] Revert "Update benchmark workflows to use self-hosted runners for improved performance" This reverts commit e2d7c789fccd440a7f376af4be431478c45d637d. --- .../actions/setup-disk-benchmark/action.yml | 21 -------- .github/workflows/disk-benchmarks-aa.yml | 36 +++++--------- .github/workflows/disk-benchmarks.yml | 49 +++++++------------ 3 files changed, 32 insertions(+), 74 deletions(-) diff --git a/.github/actions/setup-disk-benchmark/action.yml b/.github/actions/setup-disk-benchmark/action.yml index 7ad7d4174..19c6fcbaa 100644 --- a/.github/actions/setup-disk-benchmark/action.yml +++ b/.github/actions/setup-disk-benchmark/action.yml @@ -17,35 +17,14 @@ inputs: extract-to: description: 'Directory to extract the dataset into' required: true - nvme-mount-path: - description: 'Mount point for the high-speed NVMe SSD' - required: false - default: '/mnt/nvme' runs: using: composite steps: - name: Install Rust - shell: bash - run: | - if ! command -v rustup &>/dev/null; then - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y - echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" - fi - - - name: Show Rust toolchain shell: bash run: rustup show - - name: Mount high-speed NVMe SSD - shell: bash - run: | - sudo mkdir -p ${{ inputs.nvme-mount-path }} - sudo lsblk - sudo mkfs.ext4 /dev/nvme0n1 - sudo mount /dev/nvme0n1 ${{ inputs.nvme-mount-path }} - sudo chmod 777 ${{ inputs.nvme-mount-path }} - - name: Install system dependencies shell: bash run: | diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index d5669ae11..800acbdfb 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -24,8 +24,6 @@ concurrency: env: RUST_BACKTRACE: 1 PERF_INPUTS: diskann-benchmark/perf_test_inputs - NVME_MOUNT_PATH: nvme - DISKANN_RUST: nvme/diskann_rust defaults: run: @@ -39,7 +37,7 @@ jobs: # A/A benchmark: run main vs main to detect environment noise. aa-benchmark: name: A/A - ${{ matrix.dataset }} - runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] + runs-on: ubuntu-latest timeout-minutes: 120 strategy: fail-fast: false @@ -53,49 +51,41 @@ jobs: archive: openai-100K.tar.gz steps: - # Shallow checkout to $GITHUB_WORKSPACE so the composite action is accessible via uses:. - - name: Checkout action definitions + - name: Checkout main uses: actions/checkout@v4 with: - sparse-checkout: .github - path: github_actions + ref: main + path: diskann_rust + lfs: true - name: Setup benchmark environment - uses: ./github_actions/.github/actions/setup-disk-benchmark + uses: ./diskann_rust/.github/actions/setup-disk-benchmark with: dataset: ${{ matrix.dataset }} archive: ${{ matrix.archive }} - extract-to: ${{ env.DISKANN_RUST }}/target/tmp - nvme-mount-path: ${{ env.NVME_MOUNT_PATH }} - - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: main - path: ${{ env.DISKANN_RUST }} - lfs: true + extract-to: diskann_rust/target/tmp # A/A: build once, run twice (identical code — only detecting environment noise) - name: Build benchmark binary - working-directory: ${{ env.DISKANN_RUST }} + working-directory: diskann_rust run: cargo build -p diskann-benchmark --features disk-index --release - name: Run baseline benchmark - working-directory: ${{ env.DISKANN_RUST }} + working-directory: diskann_rust run: | cargo run -p diskann-benchmark --features disk-index --release -- \ run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_baseline.json - name: Run target benchmark - working-directory: ${{ env.DISKANN_RUST }} + working-directory: diskann_rust run: | cargo run -p diskann-benchmark --features disk-index --release -- \ run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_target.json - name: Validate benchmark results - working-directory: ${{ env.DISKANN_RUST }} + working-directory: diskann_rust run: | cargo run -p diskann-benchmark --features disk-index --release -- \ check run \ @@ -110,8 +100,8 @@ jobs: with: name: aa-results-${{ matrix.dataset }} path: | - ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.dataset }}_target.json - ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.dataset }}_baseline.json + diskann_rust/target/tmp/${{ matrix.dataset }}_target.json + diskann_rust/target/tmp/${{ matrix.dataset }}_baseline.json retention-days: 30 # Notify diskann-disk-maintainers on A/A failure — but only when the failure diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index e93e62b75..919e74c15 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -42,9 +42,6 @@ concurrency: env: RUST_BACKTRACE: 1 PERF_INPUTS: diskann-benchmark/perf_test_inputs - NVME_MOUNT_PATH: nvme - DISKANN_RUST: nvme/diskann_rust - BASELINE: nvme/baseline defaults: run: @@ -57,7 +54,7 @@ jobs: # Macro benchmark: compare current branch against baseline macro-benchmark: name: Macro Benchmark - ${{ matrix.dataset }} - runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] + runs-on: ubuntu-latest # TODO: For production benchmarks, consider using a self-hosted runner with: # - NVMe storage for consistent I/O performance # - CPU pinning (taskset) for reduced variance @@ -77,61 +74,53 @@ jobs: data_dir: OpenAIArXiv steps: - # Shallow checkout to $GITHUB_WORKSPACE so the composite action is accessible via uses:. - - name: Checkout action definitions - uses: actions/checkout@v4 - with: - sparse-checkout: .github - path: github_actions - - - name: Setup benchmark environment - uses: ./github_actions/.github/actions/setup-disk-benchmark - with: - dataset: ${{ matrix.dataset }} - archive: ${{ matrix.archive }} - extract-to: ${{ env.DISKANN_RUST }}/target/tmp - nvme-mount-path: ${{ env.NVME_MOUNT_PATH }} - - name: Checkout current branch uses: actions/checkout@v4 with: - path: ${{ env.DISKANN_RUST }} + path: diskann_rust lfs: true - name: Checkout baseline (${{ inputs.baseline_ref || 'main' }}) uses: actions/checkout@v4 with: ref: ${{ inputs.baseline_ref || 'main' }} - path: ${{ env.BASELINE }} + path: baseline lfs: true + - name: Setup benchmark environment + uses: ./diskann_rust/.github/actions/setup-disk-benchmark + with: + dataset: ${{ matrix.dataset }} + archive: ${{ matrix.archive }} + extract-to: diskann_rust/target/tmp + - name: Copy dataset to baseline run: | - mkdir -p ${{ env.BASELINE }}/target/tmp - cp -r ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.data_dir }} ${{ env.BASELINE }}/target/tmp/ + mkdir -p baseline/target/tmp + cp -r diskann_rust/target/tmp/${{ matrix.data_dir }} baseline/target/tmp/ - name: Run baseline benchmark - working-directory: ${{ env.BASELINE }} + working-directory: baseline run: | cargo run -p diskann-benchmark --features disk-index --release -- \ - run --input-file ../../${{ env.DISKANN_RUST }}/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ + run --input-file ../diskann_rust/${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_baseline.json - name: Run current branch benchmark - working-directory: ${{ env.DISKANN_RUST }} + working-directory: diskann_rust run: | cargo run -p diskann-benchmark --features disk-index --release -- \ run --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ --output-file target/tmp/${{ matrix.dataset }}_target.json - name: Validate benchmark results - working-directory: ${{ env.DISKANN_RUST }} + working-directory: diskann_rust run: | cargo run -p diskann-benchmark --features disk-index --release -- \ check run \ --tolerances ${{ env.PERF_INPUTS }}/disk-index-tolerances.json \ --input-file ${{ env.PERF_INPUTS }}/${{ matrix.config }} \ - --before ../../${{ env.BASELINE }}/target/tmp/${{ matrix.dataset }}_baseline.json \ + --before ../baseline/target/tmp/${{ matrix.dataset }}_baseline.json \ --after target/tmp/${{ matrix.dataset }}_target.json - name: Upload benchmark results @@ -140,6 +129,6 @@ jobs: with: name: benchmark-results-${{ matrix.dataset }} path: | - ${{ env.DISKANN_RUST }}/target/tmp/${{ matrix.dataset }}_target.json - ${{ env.BASELINE }}/target/tmp/${{ matrix.dataset }}_baseline.json + diskann_rust/target/tmp/${{ matrix.dataset }}_target.json + baseline/target/tmp/${{ matrix.dataset }}_baseline.json retention-days: 30 \ No newline at end of file From ac9760e39604c7bc0d621c7d2418542deb1f7b37 Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Tue, 12 May 2026 21:53:53 -0700 Subject: [PATCH 12/14] Mount to local folders --- .github/actions/setup-disk-benchmark/action.yml | 10 +++++++++- .github/workflows/disk-benchmarks-aa.yml | 12 ++++++++++++ .github/workflows/disk-benchmarks.yml | 12 ++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-disk-benchmark/action.yml b/.github/actions/setup-disk-benchmark/action.yml index 19c6fcbaa..69d7cef74 100644 --- a/.github/actions/setup-disk-benchmark/action.yml +++ b/.github/actions/setup-disk-benchmark/action.yml @@ -23,8 +23,16 @@ runs: steps: - name: Install Rust shell: bash - run: rustup show + run: | + if ! command -v rustup &>/dev/null; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> "$GITHUB_PATH" + fi + - name: Show Rust toolchain + shell: bash + run: rustup show + - name: Install system dependencies shell: bash run: | diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index 800acbdfb..164a7ae62 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -51,6 +51,18 @@ jobs: archive: openai-100K.tar.gz steps: + - name: Mount high-speed NVMe SSD + shell: bash + run: | + sudo mkdir -p nvme + sudo lsblk + sudo mkfs.ext4 /dev/nvme0n1 + sudo mount /dev/nvme0n1 nvme + sudo chmod 777 nvme + mkdir -p nvme/diskann_rust nvme/baseline + ln -s nvme/diskann_rust diskann_rust + ln -s nvme/baseline baseline + - name: Checkout main uses: actions/checkout@v4 with: diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index 919e74c15..68c13d062 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -74,6 +74,18 @@ jobs: data_dir: OpenAIArXiv steps: + - name: Mount high-speed NVMe SSD + shell: bash + run: | + sudo mkdir -p nvme + sudo lsblk + sudo mkfs.ext4 /dev/nvme0n1 + sudo mount /dev/nvme0n1 nvme + sudo chmod 777 nvme + mkdir -p nvme/diskann_rust nvme/baseline + ln -s nvme/diskann_rust diskann_rust + ln -s nvme/baseline baseline + - name: Checkout current branch uses: actions/checkout@v4 with: From 504b9dc8474b8ce5102ff90e172906af71d4aaff Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Tue, 12 May 2026 22:38:01 -0700 Subject: [PATCH 13/14] Fixing SSD mount --- .github/workflows/disk-benchmarks-aa.yml | 12 ++++++------ .github/workflows/disk-benchmarks.yml | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index 164a7ae62..5d47de794 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -54,14 +54,14 @@ jobs: - name: Mount high-speed NVMe SSD shell: bash run: | - sudo mkdir -p nvme + sudo mkdir -p /mnt/nvme sudo lsblk sudo mkfs.ext4 /dev/nvme0n1 - sudo mount /dev/nvme0n1 nvme - sudo chmod 777 nvme - mkdir -p nvme/diskann_rust nvme/baseline - ln -s nvme/diskann_rust diskann_rust - ln -s nvme/baseline baseline + sudo mount /dev/nvme0n1 /mnt/nvme + sudo chmod 777 /mnt/nvme + mkdir -p /mnt/nvme/diskann_rust /mnt/nvme/baseline + ln -s /mnt/nvme/diskann_rust diskann_rust + ln -s /mnt/nvme/baseline baseline - name: Checkout main uses: actions/checkout@v4 diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index 68c13d062..5cb1073a7 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -77,14 +77,14 @@ jobs: - name: Mount high-speed NVMe SSD shell: bash run: | - sudo mkdir -p nvme + sudo mkdir -p /mnt/nvme sudo lsblk sudo mkfs.ext4 /dev/nvme0n1 - sudo mount /dev/nvme0n1 nvme - sudo chmod 777 nvme - mkdir -p nvme/diskann_rust nvme/baseline - ln -s nvme/diskann_rust diskann_rust - ln -s nvme/baseline baseline + sudo mount /dev/nvme0n1 /mnt/nvme + sudo chmod 777 /mnt/nvme + mkdir -p /mnt/nvme/diskann_rust /mnt/nvme/baseline + ln -s /mnt/nvme/diskann_rust diskann_rust + ln -s /mnt/nvme/baseline baseline - name: Checkout current branch uses: actions/checkout@v4 From db2965511830f41cf956d51bfe918c0f574e8af0 Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Tue, 12 May 2026 22:49:27 -0700 Subject: [PATCH 14/14] Mount SSD --- .github/workflows/disk-benchmarks-aa.yml | 2 +- .github/workflows/disk-benchmarks.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/disk-benchmarks-aa.yml b/.github/workflows/disk-benchmarks-aa.yml index 5d47de794..60e38aa08 100644 --- a/.github/workflows/disk-benchmarks-aa.yml +++ b/.github/workflows/disk-benchmarks-aa.yml @@ -37,7 +37,7 @@ jobs: # A/A benchmark: run main vs main to detect environment noise. aa-benchmark: name: A/A - ${{ matrix.dataset }} - runs-on: ubuntu-latest + runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] timeout-minutes: 120 strategy: fail-fast: false diff --git a/.github/workflows/disk-benchmarks.yml b/.github/workflows/disk-benchmarks.yml index 5cb1073a7..9ef3b4075 100644 --- a/.github/workflows/disk-benchmarks.yml +++ b/.github/workflows/disk-benchmarks.yml @@ -54,7 +54,7 @@ jobs: # Macro benchmark: compare current branch against baseline macro-benchmark: name: Macro Benchmark - ${{ matrix.dataset }} - runs-on: ubuntu-latest + runs-on: [ self-hosted, 1ES.Pool=diskann-github, ubuntu-latest ] # TODO: For production benchmarks, consider using a self-hosted runner with: # - NVMe storage for consistent I/O performance # - CPU pinning (taskset) for reduced variance