forked from dstackai/dstack
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path.dstack.yml
More file actions
78 lines (71 loc) · 2.79 KB
/
.dstack.yml
File metadata and controls
78 lines (71 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
type: task
name: open-r1-grpo
# Size of the cluster
nodes: 2
python: 3.12
nvcc: true
# Required environment variables
env:
- HF_TOKEN
- WANDB_API_KEY
- NCCL_DEBUG=INFO
# VLLM configuration
- USE_VLLM=true
- MODEL=Qwen/Qwen2.5-Coder-7B-Instruct
# Qwen2.5-Coder-7B-Instruct has 28 attention heads and should be divisible by TP and DP
- TP=4
- DP=2
# Commands of the task
commands:
- uv pip install vllm==0.8.5.post1
- uv pip install setuptools
- uv pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp312-cp312-linux_x86_64.whl
- git clone https://github.com/huggingface/open-r1.git
- cd open-r1
- uv pip install .
- |
if [ "$USE_VLLM" = "true" ]; then
# Get the last IP from DSTACK_NODES_IPS for vLLM node
VLLM_HOST=$(echo $DSTACK_NODES_IPS | tr ' ' '\n' | tail -n 1)
if [ "$DSTACK_NODE_RANK" -eq $(($DSTACK_NODES_NUM - 1)) ]; then
# Last Node runs VLLM server
trl vllm-serve --model $MODEL --tensor_parallel_size $TP --data_parallel_size $DP --host 0.0.0.0
else
# Training node - adjust world size and nodes count for training
ADJUSTED_NODES_NUM=$(($DSTACK_NODES_NUM - 1))
ADJUSTED_GPUS_TOTAL=$(($DSTACK_GPUS_PER_NODE * $ADJUSTED_NODES_NUM))
# Other nodes run training
accelerate launch --config_file recipes/accelerate_configs/zero3.yaml \
--num_processes=$ADJUSTED_GPUS_TOTAL \
--num_machines=$ADJUSTED_NODES_NUM \
--machine_rank=$DSTACK_NODE_RANK \
--main_process_ip=$DSTACK_MASTER_NODE_IP \
--main_process_port=8008 \
src/open_r1/grpo.py \
--config recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml \
--model_name_or_path $MODEL \
--output_dir /checkpoints/Qwen2.5-Coder-7B-Instruct-GRPO \
--hub_model_id sjbbihan/Qwen2.5-Coder-7B-Instruct \
--vllm_server_host=$VLLM_HOST
fi
else
# Standard training mode without VLLM
echo "Running standard training without VLLM"
accelerate launch --config_file recipes/accelerate_configs/zero3.yaml \
--num_processes=$DSTACK_GPUS_NUM \
--num_machines=$DSTACK_NODES_NUM \
--machine_rank=$DSTACK_NODE_RANK \
--main_process_ip=$DSTACK_MASTER_NODE_IP \
--main_process_port=8008 \
src/open_r1/grpo.py \
--config recipes/Qwen2.5-1.5B-Instruct/grpo/config_demo.yaml \
--model_name_or_path $MODEL \
--output_dir /checkpoints/Qwen2.5-Coder-7B-Instruct-GRPO \
--hub_model_id sjbbihan/Qwen2.5-Coder-7B-Instruct \
--use_vllm false
fi
resources:
gpu: 80GB:8
shm_size: 128GB
volumes:
- /checkpoints:/checkpoints