FootprintAI · hsinatfootprintai · May 19, 2026 · May 19, 2026
diff --git a/Dockerfile.android b/Dockerfile.android
@@ -92,7 +92,9 @@ RUN mkdir -p /out/llama.cpp/android-arm64 /out/llama.cpp/include /out/llama.cpp/
     cp llama-src/include/*.h /out/llama.cpp/include/ && \
     cp llama-src/ggml/include/*.h /out/llama.cpp/ggml/include/ && \
     cp llama-src/common/common.h /out/llama.cpp/common/ && \
-    cp llama-src/common/sampling.h /out/llama.cpp/common/
+    cp llama-src/common/sampling.h /out/llama.cpp/common/ && \
+    cp llama-src/tools/mtmd/mtmd.h /out/llama.cpp/include/ && \
+    cp llama-src/tools/mtmd/mtmd-helper.h /out/llama.cpp/include/
 
 # Collect whisper.cpp artifacts and strip debug symbols
 RUN mkdir -p /out/whisper.cpp/android-arm64 /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \

diff --git a/Dockerfile.libs b/Dockerfile.libs
@@ -53,6 +53,8 @@ RUN WHISPER_VERSION=$(go run ./cmd/versioncmd whisper.cpp) && \
 # ============================================================================
 FROM golang:1.24-bookworm AS builder-cpu
 
+ARG ARCH_SUFFIX=linux-amd64
+
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential cmake && \
     rm -rf /var/lib/apt/lists/*
@@ -72,18 +74,20 @@ RUN cd whisper-src && \
     cmake --build build --config Release -j$(nproc)
 
 # Collect llama.cpp artifacts and strip debug symbols
-RUN mkdir -p /out/llama.cpp/linux-amd64 /out/llama.cpp/include /out/llama.cpp/ggml/include /out/llama.cpp/common && \
-    find llama-src/build -name "*.a" -exec cp {} /out/llama.cpp/linux-amd64/ \; && \
-    find /out/llama.cpp/linux-amd64 -name "*.a" -exec strip --strip-debug {} \; && \
+RUN mkdir -p /out/llama.cpp/${ARCH_SUFFIX} /out/llama.cpp/include /out/llama.cpp/ggml/include /out/llama.cpp/common && \
+    find llama-src/build -name "*.a" -exec cp {} /out/llama.cpp/${ARCH_SUFFIX}/ \; && \
+    find /out/llama.cpp/${ARCH_SUFFIX} -name "*.a" -exec strip --strip-debug {} \; && \
     cp llama-src/include/*.h /out/llama.cpp/include/ && \
     cp llama-src/ggml/include/*.h /out/llama.cpp/ggml/include/ && \
     cp llama-src/common/common.h /out/llama.cpp/common/ && \
-    cp llama-src/common/sampling.h /out/llama.cpp/common/
+    cp llama-src/common/sampling.h /out/llama.cpp/common/ && \
+    cp llama-src/tools/mtmd/mtmd.h /out/llama.cpp/include/ && \
+    cp llama-src/tools/mtmd/mtmd-helper.h /out/llama.cpp/include/
 
 # Collect whisper.cpp artifacts and strip debug symbols
-RUN mkdir -p /out/whisper.cpp/linux-amd64 /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \
-    find whisper-src/build -name "*.a" -exec cp {} /out/whisper.cpp/linux-amd64/ \; && \
-    find /out/whisper.cpp/linux-amd64 -name "*.a" -exec strip --strip-debug {} \; && \
+RUN mkdir -p /out/whisper.cpp/${ARCH_SUFFIX} /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \
+    find whisper-src/build -name "*.a" -exec cp {} /out/whisper.cpp/${ARCH_SUFFIX}/ \; && \
+    find /out/whisper.cpp/${ARCH_SUFFIX} -name "*.a" -exec strip --strip-debug {} \; && \
     cp whisper-src/include/*.h /out/whisper.cpp/include/ && \
     cp whisper-src/ggml/include/*.h /out/whisper.cpp/ggml/include/
 
@@ -92,9 +96,15 @@ RUN mkdir -p /out/whisper.cpp/linux-amd64 /out/whisper.cpp/include /out/whisper.
 # ============================================================================
 FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 AS builder-cuda
 
+# gcc-12: gcc-13 hits an internal-compiler-error on fattn-mma-f16 templates
+# (cfgcleanup.cc:580 try_forward_edges) — reproducible at any -j on b9222.
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential cmake wget && \
-    rm -rf /var/lib/apt/lists/*
+    build-essential cmake wget gcc-12 g++-12 && \
+    rm -rf /var/lib/apt/lists/* && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100
+
+ENV CC=gcc-12 CXX=g++-12
 
 WORKDIR /src
 COPY --from=sources /src/llama-src llama-src
@@ -117,7 +127,9 @@ RUN mkdir -p /out/llama.cpp/linux-amd64-cuda /out/llama.cpp/include /out/llama.c
     cp llama-src/include/*.h /out/llama.cpp/include/ && \
     cp llama-src/ggml/include/*.h /out/llama.cpp/ggml/include/ && \
     cp llama-src/common/common.h /out/llama.cpp/common/ && \
-    cp llama-src/common/sampling.h /out/llama.cpp/common/
+    cp llama-src/common/sampling.h /out/llama.cpp/common/ && \
+    cp llama-src/tools/mtmd/mtmd.h /out/llama.cpp/include/ && \
+    cp llama-src/tools/mtmd/mtmd-helper.h /out/llama.cpp/include/
 
 # Collect whisper.cpp artifacts (CUDA variant) and strip debug symbols
 RUN mkdir -p /out/whisper.cpp/linux-amd64-cuda /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \
@@ -133,9 +145,16 @@ RUN mkdir -p /out/whisper.cpp/linux-amd64-cuda /out/whisper.cpp/include /out/whi
 # (llama.cpp b8220+ needs VK_EXT_layer_settings from Vulkan 1.3.261+)
 FROM ubuntu:24.04 AS builder-vulkan
 
+# - spirv-headers: required since llama.cpp b9000+ (ggml-vulkan calls find_package(SPIRV-Headers))
+# - gcc-12: gcc-13 ICEs on common/arg.cpp.o under load
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential cmake wget ca-certificates libvulkan-dev glslang-tools glslc && \
-    rm -rf /var/lib/apt/lists/*
+    build-essential cmake wget ca-certificates libvulkan-dev glslang-tools glslc \
+    spirv-headers gcc-12 g++-12 && \
+    rm -rf /var/lib/apt/lists/* && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \
+    update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100
+
+ENV CC=gcc-12 CXX=g++-12
 
 WORKDIR /src
 COPY --from=sources /src/llama-src llama-src
@@ -158,7 +177,9 @@ RUN mkdir -p /out/llama.cpp/linux-amd64-vulkan /out/llama.cpp/include /out/llama
     cp llama-src/include/*.h /out/llama.cpp/include/ && \
     cp llama-src/ggml/include/*.h /out/llama.cpp/ggml/include/ && \
     cp llama-src/common/common.h /out/llama.cpp/common/ && \
-    cp llama-src/common/sampling.h /out/llama.cpp/common/
+    cp llama-src/common/sampling.h /out/llama.cpp/common/ && \
+    cp llama-src/tools/mtmd/mtmd.h /out/llama.cpp/include/ && \
+    cp llama-src/tools/mtmd/mtmd-helper.h /out/llama.cpp/include/
 
 # Collect whisper.cpp artifacts (Vulkan variant) and strip debug symbols
 RUN mkdir -p /out/whisper.cpp/linux-amd64-vulkan /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \

diff --git a/Dockerfile.libs-arm64 b/Dockerfile.libs-arm64
@@ -0,0 +1,98 @@
+# Dockerfile.libs-arm64 — cross-compile linux-arm64 static libraries
+#
+# Uses aarch64-linux-gnu gcc/g++ toolchain on an amd64 host. No qemu/binfmt
+# required, which makes it work inside unprivileged LXC where binfmt_misc
+# is read-only.
+#
+# Usage:
+#   docker build -f Dockerfile.libs-arm64 -o ./out .
+
+# ============================================================================
+# Stage: Download sources (shared)
+# ============================================================================
+FROM golang:1.24-bookworm AS sources
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    wget && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /src
+
+COPY go.mod ./
+COPY version.go ./
+COPY cmd/versioncmd/ ./cmd/versioncmd/
+
+RUN LLAMA_VERSION=$(go run ./cmd/versioncmd llama.cpp) && \
+    echo "Downloading llama.cpp ${LLAMA_VERSION}..." && \
+    wget -qO llama.cpp.tar.gz "https://github.com/ggerganov/llama.cpp/archive/refs/tags/${LLAMA_VERSION}.tar.gz" && \
+    mkdir -p llama-src && \
+    tar xzf llama.cpp.tar.gz --strip-components=1 -C llama-src && \
+    rm llama.cpp.tar.gz
+
+RUN WHISPER_VERSION=$(go run ./cmd/versioncmd whisper.cpp) && \
+    echo "Downloading whisper.cpp ${WHISPER_VERSION}..." && \
+    wget -qO whisper.cpp.tar.gz "https://github.com/ggerganov/whisper.cpp/archive/refs/tags/${WHISPER_VERSION}.tar.gz" && \
+    mkdir -p whisper-src && \
+    tar xzf whisper.cpp.tar.gz --strip-components=1 -C whisper-src && \
+    rm whisper.cpp.tar.gz
+
+# ============================================================================
+# Builder: cross-compile to aarch64-linux-gnu
+# ============================================================================
+FROM debian:bookworm AS builder
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential cmake \
+    crossbuild-essential-arm64 && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /src
+COPY --from=sources /src/llama-src llama-src
+COPY --from=sources /src/whisper-src whisper-src
+
+RUN printf '%s\n' \
+    'set(CMAKE_SYSTEM_NAME Linux)' \
+    'set(CMAKE_SYSTEM_PROCESSOR aarch64)' \
+    'set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)' \
+    'set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)' \
+    'set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)' \
+    'set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)' \
+    'set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)' \
+    > /src/aarch64-toolchain.cmake
+
+# Build llama.cpp (aarch64)
+RUN cd llama-src && \
+    cmake -B build \
+        -DCMAKE_TOOLCHAIN_FILE=/src/aarch64-toolchain.cmake \
+        -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF \
+        -DGGML_NATIVE=OFF && \
+    cmake --build build --config Release -j$(nproc)
+
+# Build whisper.cpp (aarch64)
+RUN cd whisper-src && \
+    cmake -B build \
+        -DCMAKE_TOOLCHAIN_FILE=/src/aarch64-toolchain.cmake \
+        -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF \
+        -DGGML_NATIVE=OFF && \
+    cmake --build build --config Release -j$(nproc)
+
+# Collect llama.cpp artifacts
+RUN mkdir -p /out/llama.cpp/linux-arm64 /out/llama.cpp/include /out/llama.cpp/ggml/include /out/llama.cpp/common && \
+    find llama-src/build -name "*.a" -exec cp {} /out/llama.cpp/linux-arm64/ \; && \
+    find /out/llama.cpp/linux-arm64 -name "*.a" -exec aarch64-linux-gnu-strip --strip-debug {} \; && \
+    cp llama-src/include/*.h /out/llama.cpp/include/ && \
+    cp llama-src/ggml/include/*.h /out/llama.cpp/ggml/include/ && \
+    cp llama-src/common/common.h /out/llama.cpp/common/ && \
+    cp llama-src/common/sampling.h /out/llama.cpp/common/ && \
+    cp llama-src/tools/mtmd/mtmd.h /out/llama.cpp/include/ && \
+    cp llama-src/tools/mtmd/mtmd-helper.h /out/llama.cpp/include/
+
+# Collect whisper.cpp artifacts
+RUN mkdir -p /out/whisper.cpp/linux-arm64 /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \
+    find whisper-src/build -name "*.a" -exec cp {} /out/whisper.cpp/linux-arm64/ \; && \
+    find /out/whisper.cpp/linux-arm64 -name "*.a" -exec aarch64-linux-gnu-strip --strip-debug {} \; && \
+    cp whisper-src/include/*.h /out/whisper.cpp/include/ && \
+    cp whisper-src/ggml/include/*.h /out/whisper.cpp/ggml/include/
+
+FROM scratch
+COPY --from=builder /out/ /
diff --git a/Makefile b/Makefile
@@ -13,6 +13,7 @@
 #   make build-libs-linux-cpu         # Build linux-amd64 CPU only
 #   make build-libs-linux-cuda        # Build linux-amd64 CUDA only
 #   make build-libs-linux-vulkan      # Build linux-amd64 Vulkan only
+#   make build-libs-linux-arm64       # Build linux-arm64 (CPU) via qemu emulation
 #   make build-libs-android           # Build android-arm64 via NDK
 #   make build-libs-all               # Build native + all linux + android
 #   make clean                        # Remove temp build dirs (keeps prebuilt .a + headers)
@@ -38,7 +39,7 @@ WHISPER_PREBUILT := $(WHISPER_THIRD_PARTY)/prebuilt/$(PLATFORM)
 
 .PHONY: build-libs build-libs-llama build-libs-whisper \
        build-libs-linux build-libs-linux-cpu build-libs-linux-cuda build-libs-linux-vulkan \
-       build-libs-android build-libs-all clean verify
+       build-libs-linux-arm64 build-libs-android build-libs-all clean verify
 
 build-libs: build-libs-llama build-libs-whisper
 
@@ -138,6 +139,29 @@ build-libs-linux-cuda:
 build-libs-linux-vulkan:
 	$(call build-linux-variant,vulkan,-vulkan)
 
+# ============================================================================
+# Docker build for linux-arm64 (cross-compile via aarch64-linux-gnu toolchain)
+# ============================================================================
+# Uses Dockerfile.libs-arm64 which runs aarch64 gcc/g++ inside an amd64
+# container. No qemu/binfmt needed (works in unprivileged LXC).
+build-libs-linux-arm64:
+	@echo "==> Building linux-arm64 static libraries via Docker (cross-compile)..."
+	docker build -f Dockerfile.libs-arm64 -o ./out .
+	@mkdir -p $(LLAMA_THIRD_PARTY)/prebuilt/linux-arm64
+	cp out/llama.cpp/linux-arm64/*.a $(LLAMA_THIRD_PARTY)/prebuilt/linux-arm64/
+	@mkdir -p $(LLAMA_THIRD_PARTY)/include $(LLAMA_THIRD_PARTY)/ggml/include $(LLAMA_THIRD_PARTY)/common
+	cp out/llama.cpp/include/*.h $(LLAMA_THIRD_PARTY)/include/
+	cp out/llama.cpp/ggml/include/*.h $(LLAMA_THIRD_PARTY)/ggml/include/
+	cp out/llama.cpp/common/common.h $(LLAMA_THIRD_PARTY)/common/
+	cp out/llama.cpp/common/sampling.h $(LLAMA_THIRD_PARTY)/common/
+	@mkdir -p $(WHISPER_THIRD_PARTY)/prebuilt/linux-arm64
+	cp out/whisper.cpp/linux-arm64/*.a $(WHISPER_THIRD_PARTY)/prebuilt/linux-arm64/
+	@mkdir -p $(WHISPER_THIRD_PARTY)/include $(WHISPER_THIRD_PARTY)/ggml/include
+	cp out/whisper.cpp/include/*.h $(WHISPER_THIRD_PARTY)/include/
+	cp out/whisper.cpp/ggml/include/*.h $(WHISPER_THIRD_PARTY)/ggml/include/
+	rm -rf out
+	@echo "==> linux-arm64 libraries ready"
+
 # ============================================================================
 # Docker build for android-arm64 (cross-compile via Android NDK)
 # ============================================================================

diff --git a/ggml/llamacpp/llamacpp.go b/ggml/llamacpp/llamacpp.go
@@ -12,7 +12,7 @@ package llamacpp
 #cgo CXXFLAGS: -std=c++17 -I${SRCDIR}/third_party/include -I${SRCDIR}/third_party/ggml/include -I${SRCDIR}/third_party/common
 #cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/darwin-arm64
 #cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/darwin-amd64
-#cgo darwin LDFLAGS: -lmtmd -lcommon -lllama -lggml-cpu -lggml-base -lggml -lggml-blas -lggml-metal -L/usr/local/opt/libomp/lib -L/opt/homebrew/opt/libomp/lib -lomp -framework Accelerate -framework Metal -framework Foundation -lstdc++ -lm
+#cgo darwin LDFLAGS: -lmtmd -lllama-common -lllama-common-base -lllama -lggml-cpu -lggml-base -lggml -lggml-blas -lggml-metal -L/usr/local/opt/libomp/lib -L/opt/homebrew/opt/libomp/lib -lomp -framework Accelerate -framework Metal -framework Foundation -lstdc++ -lm
 #include <stdlib.h>
 #include <stdbool.h>
 #include "wrapper.h"

diff --git a/ggml/llamacpp/llamacpp_android.go b/ggml/llamacpp/llamacpp_android.go
@@ -7,6 +7,6 @@ package llamacpp
 
 /*
 #cgo android,arm64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/android-arm64
-#cgo android LDFLAGS: -Wl,--start-group -lmtmd -lcommon -lllama -lggml-cpu -lggml-base -lggml -Wl,--end-group -lstdc++ -lm -ldl -llog
+#cgo android LDFLAGS: -Wl,--start-group -lmtmd -lllama-common -lllama-common-base -lllama -lggml-cpu -lggml-base -lggml -Wl,--end-group -lstdc++ -lm -ldl -llog
 */
 import "C"
diff --git a/ggml/llamacpp/llamacpp_linux.go b/ggml/llamacpp/llamacpp_linux.go
@@ -8,6 +8,6 @@ package llamacpp
 /*
 #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64
 #cgo linux,arm64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-arm64
-#cgo linux LDFLAGS: -Wl,--start-group -lmtmd -lcommon -lllama -lggml-cpu -lggml-base -lggml -Wl,--end-group -lstdc++ -lm -lpthread -ldl -lrt -lgomp
+#cgo linux LDFLAGS: -Wl,--start-group -lmtmd -lllama-common -lllama-common-base -lllama -lggml-cpu -lggml-base -lggml -Wl,--end-group -lstdc++ -lm -lpthread -ldl -lrt -lgomp
 */
 import "C"
diff --git a/ggml/llamacpp/llamacpp_linux_cuda.go b/ggml/llamacpp/llamacpp_linux_cuda.go
@@ -7,6 +7,6 @@ package llamacpp
 
 /*
 #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64-cuda
-#cgo linux LDFLAGS: -Wl,--start-group -lmtmd -lcommon -lllama -lggml-cpu -lggml-base -lggml -lggml-cuda -Wl,--end-group -lcuda -lcudart -lcublas -lcublasLt -lstdc++ -lm -lpthread -ldl -lrt -lgomp
+#cgo linux LDFLAGS: -Wl,--start-group -lmtmd -lllama-common -lllama-common-base -lllama -lggml-cpu -lggml-base -lggml -lggml-cuda -Wl,--end-group -lcuda -lcudart -lcublas -lcublasLt -lstdc++ -lm -lpthread -ldl -lrt -lgomp
 */
 import "C"
diff --git a/ggml/llamacpp/llamacpp_linux_vulkan.go b/ggml/llamacpp/llamacpp_linux_vulkan.go
@@ -7,6 +7,6 @@ package llamacpp
 
 /*
 #cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64-vulkan
-#cgo linux LDFLAGS: -Wl,--start-group -lmtmd -lcommon -lllama -lggml-cpu -lggml-base -lggml -lggml-vulkan -Wl,--end-group -lvulkan -lstdc++ -lm -lpthread -ldl -lrt -lgomp
+#cgo linux LDFLAGS: -Wl,--start-group -lmtmd -lllama-common -lllama-common-base -lllama -lggml-cpu -lggml-base -lggml -lggml-vulkan -Wl,--end-group -lvulkan -lstdc++ -lm -lpthread -ldl -lrt -lgomp
 */
 import "C"