add CPU support for docker compose files (#38)

alexjyong · web-flow · commit f8f0652193b4 · 2025-04-19T02:41:00.000+10:00
* Create Dockerfile.cpu * Update README.md * Create docker-compose-cpu.yaml * Rename docker-compose.yml to docker-compose-gpu.yml Contribution by https://github.com/alexjyong
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
@@ -0,0 +1,47 @@
+FROM ubuntu:22.04
+
+# Set non-interactive frontend
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install Python and other dependencies
+RUN apt-get update && apt-get install -y \
+    python3.10 \
+    python3-pip \
+    python3-venv \
+    libsndfile1 \
+    ffmpeg \
+    portaudio19-dev \
+ && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user and set up directories
+RUN useradd -m -u 1001 appuser && \
+    mkdir -p /app/outputs /app && \
+    chown -R appuser:appuser /app
+
+USER appuser
+WORKDIR /app
+
+# Copy dependency files
+COPY --chown=appuser:appuser requirements.txt ./requirements.txt
+
+# Create and activate virtual environment
+RUN python3 -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"
+
+# Install CPU-only PyTorch and other dependencies
+RUN pip3 install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu && \
+    pip3 install --no-cache-dir -r requirements.txt
+
+# Copy project files
+COPY --chown=appuser:appuser . .
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app \
+    USE_GPU=false
+
+# Expose the port
+EXPOSE 5005
+
+# Run FastAPI server with uvicorn
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5005", "--workers", "1"]
diff --git a/README.md b/README.md
@@ -106,6 +106,7 @@ Orpheus-FastAPI/
 ### 🐳 Docker compose
 
 The docker compose file orchestrates the Orpheus-FastAPI for audio and a llama.cpp inference server for the base model token generation. The GGUF model is downloaded with the model-init service.
+There are two versions, one for machines that have access to GPU support `docker-compose-gpu.yaml` and one for CPU support only: `docker-compose-cpu.yaml`
 
 ```bash
 cp .env.example .env # Create your .env file from the example
@@ -119,8 +120,15 @@ ORPHEUS_MODEL_NAME=Orpheus-3b-French-FT-Q8_0.gguf  # Example for French
 ```
 
 Then start the services:
+
+For GPU support run
+```bash
+docker compose -f docker-compose-gpu.yml up
+```
+
+For CPU support run:
 ```bash
-docker compose up --build
+docker compose -f docker-compose-cpu.yml up
 ```
 
 The system will automatically download the specified model from Hugging Face before starting the service.
diff --git a/docker-compose-cpu.yaml b/docker-compose-cpu.yaml
@@ -0,0 +1,56 @@
+services:
+  orpheus-fastapi:
+    container_name: orpheus-fastapi
+    build:
+      context: .
+      dockerfile: Dockerfile.cpu  
+    ports:
+      - "5005:5005"
+    env_file:
+      - .env
+    environment:
+      - ORPHEUS_API_URL=http://llama-cpp-server:5006/v1/completions
+    restart: unless-stopped
+    depends_on:
+      llama-cpp-server:
+        condition: service_started
+
+  llama-cpp-server:
+    image: ghcr.io/ggml-org/llama.cpp:server
+    ports:
+      - "5006:5006"
+    volumes:
+      - ./models:/models
+    env_file:
+      - .env
+    depends_on:
+      model-init:
+        condition: service_completed_successfully
+    restart: unless-stopped
+    command: >
+      -m /models/${ORPHEUS_MODEL_NAME}
+      --host 0.0.0.0
+      --port 5006
+      --ctx-size ${ORPHEUS_MAX_TOKENS}
+      --n-predict ${ORPHEUS_MAX_TOKENS}
+      --threads ${LLAMA_CPU_THREADS:-6}
+      --threads-batch ${LLAMA_CPU_THREADS:-6}
+      --rope-scaling linear
+      --no-mmap
+      --no-slots
+      --no-webui
+  model-init:
+    image: curlimages/curl:latest
+    user: ${UID}:${GID}
+    volumes:
+      - ./models:/app/models
+    working_dir: /app
+    command: >
+      sh -c '
+      if [ ! -f /app/models/${ORPHEUS_MODEL_NAME} ]; then
+        echo "Downloading model file..."
+        wget -P /app/models https://huggingface.co/lex-au/${ORPHEUS_MODEL_NAME}/resolve/main/${ORPHEUS_MODEL_NAME}
+      else
+        echo "Model file already exists"
+      fi'
+    restart: "no"
diff --git a/docker-compose-gpu.yml b/docker-compose-gpu.yml