Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions 2.0/problems/vector_db_ann_disk/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
tag: systems
runtime:
language: rust
timeout_seconds: 10800
environment: "Rust project; hidden disk ANN benchmark; Python/NumPy judge"
apt_packages:
- build-essential
- cargo
- git
- rustc
judge_apt_packages:
- build-essential
- cargo
- rustc
- python3-pip
- python3-numpy
judge_pip_packages:
- faiss-cpu
docker:
image: ubuntu:24.04
environment:
# If these resource limits change, also update the resource budget text in
# readme and harbor/app/README.md so agents can design parallel algorithms
# for the actual CPU and memory budget.
cpus: 8
memory_mb: 8192
storage_mb: 8192
build_timeout_seconds: 3600
evaluation:
# The judge drives the search service with this many concurrent workers.
# Keep this aligned with the CPU budget unless the task is intentionally
# changed into a higher-concurrency service benchmark.
query_concurrency: 8
queries_per_worker: 64
submission:
kind: directory
path: /app
exclude:
- target
- .git
- .frontier-cs
37 changes: 37 additions & 0 deletions 2.0/problems/vector_db_ann_disk/evaluate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

if [[ $# -gt 0 ]]; then
exec python3 "$SCRIPT_DIR/evaluator.py" "$@"
fi

SOLUTION="/work/execution_env/solution_env/solution.rs"
if [[ ! -f "$SOLUTION" ]]; then
echo "Error: Missing $SOLUTION" >&2
exit 1
fi

if ! command -v cargo >/dev/null 2>&1 || ! python3 -c 'import numpy, faiss' >/dev/null 2>&1; then
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y -qq --no-install-recommends \
build-essential cargo rustc python3-pip python3-numpy >/dev/null
python3 -c 'import faiss' >/dev/null 2>&1 || \
pip3 install --break-system-packages -q faiss-cpu
fi

WORKDIR=$(mktemp -d)
trap 'rm -rf "$WORKDIR"' EXIT
cp -R "$SCRIPT_DIR/harbor/app/." "$WORKDIR/"
cp "$SOLUTION" "$WORKDIR/src/db.rs"

# The repository validator checks that the evaluator path works; the full
# 100M-vector disk benchmark is exercised through Harbor.
export FRONTIER_VECTOR_DB_N="${FRONTIER_VECTOR_DB_N:-5000}"
export FRONTIER_VECTOR_DB_Q="${FRONTIER_VECTOR_DB_Q:-16}"
export FRONTIER_VECTOR_DB_WARMUP="${FRONTIER_VECTOR_DB_WARMUP:-4}"
export FRONTIER_VECTOR_DB_CACHE="${FRONTIER_VECTOR_DB_CACHE:-/tmp/frontier_vector_db_ann_disk_ci}"

python3 "$SCRIPT_DIR/evaluator.py" "$WORKDIR"
Loading