mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-22 18:13:52 +00:00
LitServe-based prompt injection detection server with a React monitoring dashboard. Serves HuggingFace classification models behind an OpenAI-compatible API with real-time metrics and GPU acceleration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
32 lines
568 B
YAML
32 lines
568 B
YAML
services:
|
|
backend:
|
|
build: .
|
|
ports:
|
|
- "8234:8234"
|
|
volumes:
|
|
- model-cache:/root/.cache/huggingface
|
|
environment:
|
|
- DEVICE=cuda:0
|
|
- LITGUARD_CONFIG=/app/config.yaml
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
restart: unless-stopped
|
|
|
|
ui:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.ui
|
|
ports:
|
|
- "3000:80"
|
|
depends_on:
|
|
- backend
|
|
restart: unless-stopped
|
|
|
|
volumes:
|
|
model-cache:
|