mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 02:23:53 +00:00
Add Neo4j as an alternative graph database backend alongside ArangoDB. This includes updates to the frontend components, API routes, and docker-compose configuration for Neo4j integration.
202 lines
6.5 KiB
YAML
202 lines
6.5 KiB
YAML
|
|
services:
|
|
app:
|
|
build:
|
|
context: ../..
|
|
dockerfile: deploy/app/Dockerfile
|
|
ports:
|
|
- '3001:3000'
|
|
environment:
|
|
# Neo4j configuration (ARM64 compatible - works with 64KB pages)
|
|
- NEO4J_URI=bolt://neo4j:7687
|
|
- NEO4J_USER=neo4j
|
|
- NEO4J_PASSWORD=password123
|
|
- GRAPH_DB_TYPE=neo4j
|
|
# ArangoDB disabled - set to localhost to prevent DNS errors if accidentally accessed
|
|
- ARANGODB_URL=http://localhost:8529
|
|
- ARANGODB_DB=txt2kg
|
|
- QDRANT_URL=http://qdrant:6333
|
|
- VECTOR_DB_TYPE=qdrant
|
|
- LANGCHAIN_TRACING_V2=true
|
|
- SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
|
|
- MODEL_NAME=all-MiniLM-L6-v2
|
|
- EMBEDDINGS_API_URL=http://sentence-transformers:80
|
|
- GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
|
|
- NODE_TLS_REJECT_UNAUTHORIZED=0
|
|
- OLLAMA_BASE_URL=http://ollama:11434/v1
|
|
- OLLAMA_MODEL=llama3.1:8b
|
|
- REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083
|
|
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
|
|
# Node.js timeout configurations for large model processing
|
|
- NODE_OPTIONS=--max-http-header-size=80000
|
|
- UV_THREADPOOL_SIZE=128
|
|
- HTTP_TIMEOUT=1800000
|
|
- REQUEST_TIMEOUT=1800000
|
|
networks:
|
|
- default
|
|
- txt2kg-network
|
|
- pinecone-net
|
|
depends_on:
|
|
neo4j:
|
|
condition: service_healthy
|
|
ollama:
|
|
condition: service_started
|
|
# Optional: sentence-transformers and entity-embeddings are only needed for vector search
|
|
# Traditional graph search works without these services
|
|
|
|
# Neo4j - ARM64 compatible graph database (works with 64KB page size kernel)
|
|
neo4j:
|
|
image: neo4j:5-community
|
|
ports:
|
|
- '7474:7474' # HTTP
|
|
- '7687:7687' # Bolt
|
|
environment:
|
|
- NEO4J_AUTH=neo4j/password123
|
|
- NEO4J_server_memory_heap_initial__size=512m
|
|
- NEO4J_server_memory_heap_max__size=2G
|
|
volumes:
|
|
- neo4j_data:/data
|
|
- neo4j_logs:/logs
|
|
networks:
|
|
- default
|
|
restart: unless-stopped
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:7474 || exit 1"]
|
|
interval: 15s
|
|
timeout: 10s
|
|
retries: 10
|
|
start_period: 60s
|
|
|
|
# ArangoDB disabled - doesn't support ARM64 with 64KB page size
|
|
# Uncomment if using 4KB page kernel (linux-nvidia-6.14)
|
|
# arangodb:
|
|
# image: arangodb:latest
|
|
# ports:
|
|
# - '8529:8529'
|
|
# environment:
|
|
# - ARANGO_NO_AUTH=1
|
|
# volumes:
|
|
# - arangodb_data:/var/lib/arangodb3
|
|
# - arangodb_apps_data:/var/lib/arangodb3-apps
|
|
ollama:
|
|
build:
|
|
context: ../services/ollama
|
|
dockerfile: Dockerfile
|
|
image: ollama-custom:latest
|
|
container_name: ollama-compose
|
|
ports:
|
|
- '11434:11434'
|
|
volumes:
|
|
- ollama_data:/root/.ollama
|
|
environment:
|
|
- NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible to the container
|
|
- NVIDIA_DRIVER_CAPABILITIES=compute,utility # Required capabilities for CUDA
|
|
- CUDA_VISIBLE_DEVICES=0 # Use first GPU
|
|
- OLLAMA_FLASH_ATTENTION=1 # Enable flash attention for better performance
|
|
- OLLAMA_KEEP_ALIVE=30m # Keep models loaded for 30 minutes
|
|
- OLLAMA_NUM_PARALLEL=4 # Process 4 requests in parallel - DGX Spark has unified memory
|
|
- OLLAMA_MAX_LOADED_MODELS=1 # Load only one model at a time to avoid VRAM contention
|
|
- OLLAMA_KV_CACHE_TYPE=q8_0 # Reduce KV cache VRAM usage with minimal performance impact
|
|
- OLLAMA_GPU_LAYERS=-1 # Force all layers on GPU
|
|
- OLLAMA_LLM_LIBRARY=cuda # Force CUDA backend
|
|
networks:
|
|
- default
|
|
restart: unless-stopped
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: all
|
|
capabilities: [gpu]
|
|
healthcheck:
|
|
test: ["CMD", "ollama", "list"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
|
|
# Optional services for vector search (NOT required for traditional graph search)
|
|
# Traditional graph search works with just: app, arangodb, and ollama
|
|
sentence-transformers:
|
|
build:
|
|
context: ../services/sentence-transformers
|
|
dockerfile: Dockerfile
|
|
ports:
|
|
- '8000:80'
|
|
environment:
|
|
- MODEL_NAME=all-MiniLM-L6-v2
|
|
networks:
|
|
- default
|
|
restart: unless-stopped
|
|
profiles:
|
|
- vector-search # Only start with: docker compose --profile vector-search up
|
|
qdrant:
|
|
image: qdrant/qdrant:latest
|
|
container_name: qdrant
|
|
ports:
|
|
- "6333:6333"
|
|
- "6334:6334"
|
|
volumes:
|
|
- qdrant_data:/qdrant/storage
|
|
networks:
|
|
- pinecone-net
|
|
restart: unless-stopped
|
|
profiles:
|
|
- vector-search # Only start with: docker compose --profile vector-search up
|
|
qdrant-init:
|
|
image: curlimages/curl:latest
|
|
depends_on:
|
|
- qdrant
|
|
restart: "no"
|
|
entrypoint: /bin/sh
|
|
command:
|
|
- -c
|
|
- |
|
|
echo 'Waiting for Qdrant to start...'
|
|
sleep 5
|
|
echo 'Checking if entity-embeddings collection exists...'
|
|
RESPONSE=$(curl -s http://qdrant:6333/collections/entity-embeddings)
|
|
if echo "$RESPONSE" | grep -q '"status":"ok"'; then
|
|
echo 'entity-embeddings collection already exists'
|
|
else
|
|
echo 'Creating collection entity-embeddings...'
|
|
curl -X PUT http://qdrant:6333/collections/entity-embeddings \
|
|
-H 'Content-Type: application/json' \
|
|
-d '{"vectors":{"size":384,"distance":"Cosine"}}'
|
|
echo ''
|
|
echo 'entity-embeddings collection created successfully'
|
|
fi
|
|
echo 'Checking if document-embeddings collection exists...'
|
|
RESPONSE=$(curl -s http://qdrant:6333/collections/document-embeddings)
|
|
if echo "$RESPONSE" | grep -q '"status":"ok"'; then
|
|
echo 'document-embeddings collection already exists'
|
|
else
|
|
echo 'Creating collection document-embeddings...'
|
|
curl -X PUT http://qdrant:6333/collections/document-embeddings \
|
|
-H 'Content-Type: application/json' \
|
|
-d '{"vectors":{"size":384,"distance":"Cosine"}}'
|
|
echo ''
|
|
echo 'document-embeddings collection created successfully'
|
|
fi
|
|
networks:
|
|
- pinecone-net
|
|
profiles:
|
|
- vector-search
|
|
|
|
volumes:
|
|
neo4j_data:
|
|
neo4j_logs:
|
|
ollama_data:
|
|
qdrant_data:
|
|
# arangodb_data:
|
|
# arangodb_apps_data:
|
|
|
|
networks:
|
|
default:
|
|
driver: bridge
|
|
txt2kg-network:
|
|
driver: bridge
|
|
pinecone-net:
|
|
name: pinecone
|