dstack/examples/distributed-training/open-r1/.dstack.yml at c88164f199a93bceac537dfaf6eb776a278c51a4 · Bihan/dstack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
type: task
name: open-r1-grpo

# Size of the cluster
nodes: 2

python: 3.12

nvcc: true

# Required environment variables
env:
  - HF_TOKEN
  - WANDB_API_KEY
  - NCCL_DEBUG=INFO
  # VLLM configuration
  - USE_VLLM=true
  - MODEL=Qwen/Qwen2.5-Coder-7B-Instruct
  # Qwen2.5-Coder-7B-Instruct has 28 attention heads and should be divisible by TP and DP
  - TP=4
  - DP=2

# Commands of the task
commands:
  - uv pip install vllm==0.8.5.post1
  - uv pip install setuptools
  - uv pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp312-cp312-linux_x86_64.whl
  - git clone https://github.com/huggingface/open-r1.git
  - cd open-r1
  - uv pip install .
  - |
    if [ "$USE_VLLM" = "true" ]; then
    # Get the last IP from DSTACK_NODES_IPS for vLLM node
      VLLM_HOST=$(echo $DSTACK_NODES_IPS | tr ' ' '\n' | tail -n 1)
      if [ "$DSTACK_NODE_RANK" -eq $(($DSTACK_NODES_NUM - 1)) ]; then
        # Last Node runs VLLM server
        trl vllm-serve --model $MODEL  --tensor_parallel_size $TP --data_parallel_size $DP --host 0.0.0.0
      else
        # Training node - adjust world size and nodes count for training
        ADJUSTED_NODES_NUM=$(($DSTACK_NODES_NUM - 1))
        ADJUSTED_GPUS_TOTAL=$(($DSTACK_GPUS_PER_NODE * $ADJUSTED_NODES_NUM))
        # Other nodes run training
        accelerate launch --config_file recipes/accelerate_configs/zero3.yaml \
            --num_processes=$ADJUSTED_GPUS_TOTAL \
            --num_machines=$ADJUSTED_NODES_NUM \
            --machine_rank=$DSTACK_NODE_RANK \
            --main_process_ip=$DSTACK_MASTER_NODE_IP \
            --main_process_port=8008 \
            src/open_r1/grpo.py \
            --config recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml \
            --model_name_or_path $MODEL \
            --output_dir /checkpoints/Qwen2.5-Coder-7B-Instruct-GRPO \
            --hub_model_id sjbbihan/Qwen2.5-Coder-7B-Instruct \
            --vllm_server_host=$VLLM_HOST
      fi
    else
      # Standard training mode without VLLM
      echo "Running standard training without VLLM"
      accelerate launch --config_file recipes/accelerate_configs/zero3.yaml \
            --num_processes=$DSTACK_GPUS_NUM \
            --num_machines=$DSTACK_NODES_NUM \
            --machine_rank=$DSTACK_NODE_RANK \
            --main_process_ip=$DSTACK_MASTER_NODE_IP \
            --main_process_port=8008 \
            src/open_r1/grpo.py \
            --config recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml \
            --model_name_or_path $MODEL \
            --output_dir /checkpoints/Qwen2.5-Coder-7B-Instruct-GRPO \
            --hub_model_id sjbbihan/Qwen2.5-Coder-7B-Instruct \
            --use_vllm false
    fi

resources:
  gpu: 80GB:8
  shm_size: 128GB

volumes:
   - /checkpoints:/checkpoints