diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..a00166e --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,77 @@ +name: Release + +# Cuts a GitHub release when a v* tag is pushed. The release artifact is the +# xcodebuild Release product: the mlx-server executable plus its colocated +# resource bundles (mlx-swift_Cmlx.bundle carries the Metal default.metallib). +# A lone binary fails at runtime with "Failed to load the default metallib", +# so the tarball must keep the bundles next to the executable. + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + release: + runs-on: macos-15 + steps: + - uses: actions/checkout@v4 + + - name: Select Xcode + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: latest-stable + + - name: Derive version + id: version + run: echo "version=${GITHUB_REF_NAME#v}" >> "$GITHUB_OUTPUT" + + # mlx-swift's Metal shaders + the MLX C++ sources are expensive to + # compile; cache the SwiftPM and Xcode build products across runs. + - name: Cache build + uses: actions/cache@v4 + with: + path: | + .build + ~/Library/Caches/org.swift.swiftpm + key: ${{ runner.os }}-release-${{ hashFiles('Package.swift') }} + restore-keys: ${{ runner.os }}-release- + + # xcodebuild (not `swift build`) compiles the Metal shaders and copies + # the resource bundles next to the binary. -skipMacroValidation is + # required for the MLXHuggingFace macro plugin in non-interactive runs. + - name: Build (xcodebuild, Release, compiles Metal shaders) + run: | + xcodebuild -scheme mlx-server \ + -destination 'platform=macOS' \ + -configuration Release \ + -derivedDataPath .build/xcode \ + -skipMacroValidation \ + build + + - name: Package artifact + id: package + run: | + set -euo pipefail + VERSION="${{ steps.version.outputs.version }}" + PRODUCTS=.build/xcode/Build/Products/Release + STAGE="mlx-server-${VERSION}-macos-arm64" + mkdir -p "dist/${STAGE}" + cp "${PRODUCTS}/mlx-server" "dist/${STAGE}/" + cp -R "${PRODUCTS}"/*.bundle "dist/${STAGE}/" + tar -C dist -czf "dist/${STAGE}.tar.gz" "${STAGE}" + ( cd dist && shasum -a 256 "${STAGE}.tar.gz" | tee "${STAGE}.tar.gz.sha256" ) + echo "tarball=dist/${STAGE}.tar.gz" >> "$GITHUB_OUTPUT" + + - name: Create release + env: + GH_TOKEN: ${{ github.token }} + run: | + gh release create "${GITHUB_REF_NAME}" \ + --title "${GITHUB_REF_NAME}" \ + --generate-notes \ + "${{ steps.package.outputs.tarball }}" \ + "${{ steps.package.outputs.tarball }}.sha256" diff --git a/README.md b/README.md index 4a2e972..9fbc30c 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ The end goal is to be a drop-in replacement for `llama-server` in [LLMKube](http - Tool calling and structured outputs - Vision-language model support (Qwen-VL, Gemma 4 VL, etc.) - Multi-slot concurrency with longest-prefix KV cache reuse -- Single static binary distribution via Homebrew + GitHub releases +- One-command install via Homebrew, plus prebuilt GitHub releases ## Non-goals diff --git a/Sources/MLXServer/MLXServerCommand.swift b/Sources/MLXServer/MLXServerCommand.swift index 47cde87..7f11208 100644 --- a/Sources/MLXServer/MLXServerCommand.swift +++ b/Sources/MLXServer/MLXServerCommand.swift @@ -5,7 +5,8 @@ import MLXServerKit struct MLXServerCommand: AsyncParsableCommand { static let configuration = CommandConfiguration( commandName: "mlx-server", - abstract: "OpenAI-compatible HTTP server for mlx-swift-lm on Apple Silicon." + abstract: "OpenAI-compatible HTTP server for mlx-swift-lm on Apple Silicon.", + version: "0.1.0" ) @Option(name: .long, help: "Model identifier (HuggingFace ID or local directory path).")