mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 02:23:53 +00:00
176 lines
5.8 KiB
YAML
176 lines
5.8 KiB
YAML
|
|
services:
|
|
app:
|
|
build:
|
|
context: ../..
|
|
dockerfile: deploy/app/Dockerfile
|
|
ports:
|
|
- '3001:3000'
|
|
environment:
|
|
- ARANGODB_URL=http://arangodb:8529
|
|
- ARANGODB_DB=txt2kg
|
|
- QDRANT_URL=http://qdrant:6333
|
|
- VECTOR_DB_TYPE=qdrant
|
|
- LANGCHAIN_TRACING_V2=true
|
|
- SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
|
|
- MODEL_NAME=all-MiniLM-L6-v2
|
|
- EMBEDDINGS_API_URL=http://sentence-transformers:80
|
|
- GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
|
|
- NODE_TLS_REJECT_UNAUTHORIZED=0
|
|
- OLLAMA_BASE_URL=http://ollama:11434/v1
|
|
- OLLAMA_MODEL=llama3.1:8b
|
|
- REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083
|
|
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
|
|
# Node.js timeout configurations for large model processing
|
|
- NODE_OPTIONS=--max-http-header-size=80000
|
|
- UV_THREADPOOL_SIZE=128
|
|
- HTTP_TIMEOUT=1800000
|
|
- REQUEST_TIMEOUT=1800000
|
|
networks:
|
|
- default
|
|
- txt2kg-network
|
|
- pinecone-net
|
|
depends_on:
|
|
- arangodb
|
|
- ollama
|
|
# Optional: sentence-transformers and entity-embeddings are only needed for vector search
|
|
# Traditional graph search works without these services
|
|
arangodb:
|
|
image: arangodb:latest
|
|
ports:
|
|
- '8529:8529'
|
|
environment:
|
|
- ARANGO_NO_AUTH=1
|
|
volumes:
|
|
- arangodb_data:/var/lib/arangodb3
|
|
- arangodb_apps_data:/var/lib/arangodb3-apps
|
|
arangodb-init:
|
|
image: arangodb:latest
|
|
depends_on:
|
|
arangodb:
|
|
condition: service_started
|
|
restart: on-failure
|
|
entrypoint: >
|
|
sh -c "
|
|
echo 'Waiting for ArangoDB to start...' &&
|
|
sleep 10 &&
|
|
echo 'Creating txt2kg database...' &&
|
|
arangosh --server.endpoint tcp://arangodb:8529 --server.authentication false --javascript.execute-string 'try { db._createDatabase(\"txt2kg\"); console.log(\"Database txt2kg created successfully!\"); } catch(e) { if(e.message.includes(\"duplicate\")) { console.log(\"Database txt2kg already exists\"); } else { throw e; } }'
|
|
"
|
|
ollama:
|
|
build:
|
|
context: ../services/ollama
|
|
dockerfile: Dockerfile
|
|
image: ollama-custom:latest
|
|
container_name: ollama-compose
|
|
ports:
|
|
- '11434:11434'
|
|
volumes:
|
|
- ollama_data:/root/.ollama
|
|
environment:
|
|
- NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible to the container
|
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility # Required capabilities for CUDA
|
|
- OLLAMA_FLASH_ATTENTION=1 # Enable flash attention for better performance
|
|
- OLLAMA_KEEP_ALIVE=30m # Keep models loaded for 30 minutes
|
|
- OLLAMA_NUM_PARALLEL=4 # Process 4 requests in parallel - DGX Spark has unified memory
|
|
- OLLAMA_MAX_LOADED_MODELS=1 # Load only one model at a time to avoid VRAM contention
|
|
- OLLAMA_KV_CACHE_TYPE=q8_0 # Reduce KV cache VRAM usage with minimal performance impact
|
|
networks:
|
|
- default
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
healthcheck:
|
|
test: ["CMD", "ollama", "list"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
|
|
# Optional services for vector search (NOT required for traditional graph search)
|
|
# Traditional graph search works with just: app, arangodb, and ollama
|
|
sentence-transformers:
|
|
build:
|
|
context: ../services/sentence-transformers
|
|
dockerfile: Dockerfile
|
|
ports:
|
|
- '8000:80'
|
|
environment:
|
|
- MODEL_NAME=all-MiniLM-L6-v2
|
|
networks:
|
|
- default
|
|
restart: unless-stopped
|
|
profiles:
|
|
- vector-search # Only start with: docker compose --profile vector-search up
|
|
qdrant:
|
|
image: qdrant/qdrant:latest
|
|
container_name: qdrant
|
|
ports:
|
|
- "6333:6333"
|
|
- "6334:6334"
|
|
volumes:
|
|
- qdrant_data:/qdrant/storage
|
|
networks:
|
|
- pinecone-net
|
|
restart: unless-stopped
|
|
profiles:
|
|
- vector-search # Only start with: docker compose --profile vector-search up
|
|
qdrant-init:
|
|
image: curlimages/curl:latest
|
|
depends_on:
|
|
- qdrant
|
|
restart: "no"
|
|
entrypoint: /bin/sh
|
|
command:
|
|
- -c
|
|
- |
|
|
echo 'Waiting for Qdrant to start...'
|
|
sleep 5
|
|
echo 'Checking if entity-embeddings collection exists...'
|
|
RESPONSE=$(curl -s http://qdrant:6333/collections/entity-embeddings)
|
|
if echo "$RESPONSE" | grep -q '"status":"ok"'; then
|
|
echo 'entity-embeddings collection already exists'
|
|
else
|
|
echo 'Creating collection entity-embeddings...'
|
|
curl -X PUT http://qdrant:6333/collections/entity-embeddings \
|
|
-H 'Content-Type: application/json' \
|
|
-d '{"vectors":{"size":384,"distance":"Cosine"}}'
|
|
echo ''
|
|
echo 'entity-embeddings collection created successfully'
|
|
fi
|
|
echo 'Checking if document-embeddings collection exists...'
|
|
RESPONSE=$(curl -s http://qdrant:6333/collections/document-embeddings)
|
|
if echo "$RESPONSE" | grep -q '"status":"ok"'; then
|
|
echo 'document-embeddings collection already exists'
|
|
else
|
|
echo 'Creating collection document-embeddings...'
|
|
curl -X PUT http://qdrant:6333/collections/document-embeddings \
|
|
-H 'Content-Type: application/json' \
|
|
-d '{"vectors":{"size":384,"distance":"Cosine"}}'
|
|
echo ''
|
|
echo 'document-embeddings collection created successfully'
|
|
fi
|
|
networks:
|
|
- pinecone-net
|
|
profiles:
|
|
- vector-search
|
|
|
|
volumes:
|
|
arangodb_data:
|
|
arangodb_apps_data:
|
|
ollama_data:
|
|
qdrant_data:
|
|
|
|
networks:
|
|
default:
|
|
driver: bridge
|
|
txt2kg-network:
|
|
driver: bridge
|
|
pinecone-net:
|
|
name: pinecone
|