-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
60 lines (56 loc) · 1.3 KB
/
docker-compose.yml
File metadata and controls
60 lines (56 loc) · 1.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
version: "3.8"
services:
hf-serve:
build:
context: .
dockerfile: Dockerfile
ports:
- "8000:8000"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
environment:
- NVIDIA_VISIBLE_DEVICES=all
vllm-serve:
build:
context: .
dockerfile: Dockerfile.vllm
ports:
- "8001:8001"
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
environment:
- MODEL_NAME_OR_PATH=Qwen/Qwen2.5-7B-Instruct
- MAX_MODEL_LEN=2048
- GPU_MEMORY_UTILIZATION=0.90
- NVIDIA_VISIBLE_DEVICES=all
volumes:
- ${HF_HOME:-~/.cache/huggingface}:/root/.cache/huggingface
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
depends_on:
- hf-serve
- vllm-serve
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- ./monitoring/grafana-dashboard.json:/var/lib/grafana/dashboards/minillm.json:ro
depends_on:
- prometheus