Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- Config, docs, scripts, and backup manifests - Submodule refs unchanged (m = modified content in submodules) Made-with: Cursor
19 lines
556 B
YAML
19 lines
556 B
YAML
# VM 5702 — Inference: llama.cpp server (CPU-friendly)
|
|
# Copy to /opt/ai/inference/ and place model at /opt/ai/inference/data/models/model.gguf
|
|
# See: docs/02-architecture/AI_AGENTS_57XX_DEPLOYMENT_PLAN.md Appendix D
|
|
|
|
services:
|
|
llama:
|
|
image: ghcr.io/ggerganov/llama.cpp:server
|
|
container_name: ai-inf-prod
|
|
volumes:
|
|
- /opt/ai/inference/data/models:/models
|
|
command: >
|
|
-m /models/model.gguf
|
|
--host 0.0.0.0 --port 8000
|
|
--n-gpu-layers 0
|
|
--ctx-size 4096
|
|
ports:
|
|
- "8000:8000"
|
|
restart: unless-stopped
|