dgx-spark-playbooks/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
Santosh Bhavani 6105731e70
feat: Add Neo4j support to txt2kg
Add Neo4j as an alternative graph database backend alongside ArangoDB.
This includes updates to the frontend components, API routes, and
docker-compose configuration for Neo4j integration.
2025-12-12 10:58:59 -08:00

202 lines
6.5 KiB
YAML

services:
app:
build:
context: ../..
dockerfile: deploy/app/Dockerfile
ports:
- '3001:3000'
environment:
# Neo4j configuration (ARM64 compatible - works with 64KB pages)
- NEO4J_URI=bolt://neo4j:7687
- NEO4J_USER=neo4j
- NEO4J_PASSWORD=password123
- GRAPH_DB_TYPE=neo4j
# ArangoDB disabled - set to localhost to prevent DNS errors if accidentally accessed
- ARANGODB_URL=http://localhost:8529
- ARANGODB_DB=txt2kg
- QDRANT_URL=http://qdrant:6333
- VECTOR_DB_TYPE=qdrant
- LANGCHAIN_TRACING_V2=true
- SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
- MODEL_NAME=all-MiniLM-L6-v2
- EMBEDDINGS_API_URL=http://sentence-transformers:80
- GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
- NODE_TLS_REJECT_UNAUTHORIZED=0
- OLLAMA_BASE_URL=http://ollama:11434/v1
- OLLAMA_MODEL=llama3.1:8b
- REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
# Node.js timeout configurations for large model processing
- NODE_OPTIONS=--max-http-header-size=80000
- UV_THREADPOOL_SIZE=128
- HTTP_TIMEOUT=1800000
- REQUEST_TIMEOUT=1800000
networks:
- default
- txt2kg-network
- pinecone-net
depends_on:
neo4j:
condition: service_healthy
ollama:
condition: service_started
# Optional: sentence-transformers and entity-embeddings are only needed for vector search
# Traditional graph search works without these services
# Neo4j - ARM64 compatible graph database (works with 64KB page size kernel)
neo4j:
image: neo4j:5-community
ports:
- '7474:7474' # HTTP
- '7687:7687' # Bolt
environment:
- NEO4J_AUTH=neo4j/password123
- NEO4J_server_memory_heap_initial__size=512m
- NEO4J_server_memory_heap_max__size=2G
volumes:
- neo4j_data:/data
- neo4j_logs:/logs
networks:
- default
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:7474 || exit 1"]
interval: 15s
timeout: 10s
retries: 10
start_period: 60s
# ArangoDB disabled - doesn't support ARM64 with 64KB page size
# Uncomment if using 4KB page kernel (linux-nvidia-6.14)
# arangodb:
# image: arangodb:latest
# ports:
# - '8529:8529'
# environment:
# - ARANGO_NO_AUTH=1
# volumes:
# - arangodb_data:/var/lib/arangodb3
# - arangodb_apps_data:/var/lib/arangodb3-apps
ollama:
build:
context: ../services/ollama
dockerfile: Dockerfile
image: ollama-custom:latest
container_name: ollama-compose
ports:
- '11434:11434'
volumes:
- ollama_data:/root/.ollama
environment:
- NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible to the container
- NVIDIA_DRIVER_CAPABILITIES=compute,utility # Required capabilities for CUDA
- CUDA_VISIBLE_DEVICES=0 # Use first GPU
- OLLAMA_FLASH_ATTENTION=1 # Enable flash attention for better performance
- OLLAMA_KEEP_ALIVE=30m # Keep models loaded for 30 minutes
- OLLAMA_NUM_PARALLEL=4 # Process 4 requests in parallel - DGX Spark has unified memory
- OLLAMA_MAX_LOADED_MODELS=1 # Load only one model at a time to avoid VRAM contention
- OLLAMA_KV_CACHE_TYPE=q8_0 # Reduce KV cache VRAM usage with minimal performance impact
- OLLAMA_GPU_LAYERS=-1 # Force all layers on GPU
- OLLAMA_LLM_LIBRARY=cuda # Force CUDA backend
networks:
- default
restart: unless-stopped
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
healthcheck:
test: ["CMD", "ollama", "list"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
# Optional services for vector search (NOT required for traditional graph search)
# Traditional graph search works with just: app, arangodb, and ollama
sentence-transformers:
build:
context: ../services/sentence-transformers
dockerfile: Dockerfile
ports:
- '8000:80'
environment:
- MODEL_NAME=all-MiniLM-L6-v2
networks:
- default
restart: unless-stopped
profiles:
- vector-search # Only start with: docker compose --profile vector-search up
qdrant:
image: qdrant/qdrant:latest
container_name: qdrant
ports:
- "6333:6333"
- "6334:6334"
volumes:
- qdrant_data:/qdrant/storage
networks:
- pinecone-net
restart: unless-stopped
profiles:
- vector-search # Only start with: docker compose --profile vector-search up
qdrant-init:
image: curlimages/curl:latest
depends_on:
- qdrant
restart: "no"
entrypoint: /bin/sh
command:
- -c
- |
echo 'Waiting for Qdrant to start...'
sleep 5
echo 'Checking if entity-embeddings collection exists...'
RESPONSE=$(curl -s http://qdrant:6333/collections/entity-embeddings)
if echo "$RESPONSE" | grep -q '"status":"ok"'; then
echo 'entity-embeddings collection already exists'
else
echo 'Creating collection entity-embeddings...'
curl -X PUT http://qdrant:6333/collections/entity-embeddings \
-H 'Content-Type: application/json' \
-d '{"vectors":{"size":384,"distance":"Cosine"}}'
echo ''
echo 'entity-embeddings collection created successfully'
fi
echo 'Checking if document-embeddings collection exists...'
RESPONSE=$(curl -s http://qdrant:6333/collections/document-embeddings)
if echo "$RESPONSE" | grep -q '"status":"ok"'; then
echo 'document-embeddings collection already exists'
else
echo 'Creating collection document-embeddings...'
curl -X PUT http://qdrant:6333/collections/document-embeddings \
-H 'Content-Type: application/json' \
-d '{"vectors":{"size":384,"distance":"Cosine"}}'
echo ''
echo 'document-embeddings collection created successfully'
fi
networks:
- pinecone-net
profiles:
- vector-search
volumes:
neo4j_data:
neo4j_logs:
ollama_data:
qdrant_data:
# arangodb_data:
# arangodb_apps_data:
networks:
default:
driver: bridge
txt2kg-network:
driver: bridge
pinecone-net:
name: pinecone