services: app: build: context: ../.. dockerfile: deploy/app/Dockerfile ports: - '3001:3000' environment: # Neo4j configuration (ARM64 compatible - works with 64KB pages) - NEO4J_URI=bolt://neo4j:7687 - NEO4J_USER=neo4j - NEO4J_PASSWORD=password123 - GRAPH_DB_TYPE=neo4j # ArangoDB disabled - set to localhost to prevent DNS errors if accidentally accessed - ARANGODB_URL=http://localhost:8529 - ARANGODB_DB=txt2kg - QDRANT_URL=http://qdrant:6333 - VECTOR_DB_TYPE=qdrant - LANGCHAIN_TRACING_V2=true - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80 - MODEL_NAME=all-MiniLM-L6-v2 - EMBEDDINGS_API_URL=http://sentence-transformers:80 - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA - NODE_TLS_REJECT_UNAUTHORIZED=0 - OLLAMA_BASE_URL=http://ollama:11434/v1 - OLLAMA_MODEL=llama3.1:8b - REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083 - NVIDIA_API_KEY=${NVIDIA_API_KEY:-} # Node.js timeout configurations for large model processing - NODE_OPTIONS=--max-http-header-size=80000 - UV_THREADPOOL_SIZE=128 - HTTP_TIMEOUT=1800000 - REQUEST_TIMEOUT=1800000 networks: - default - txt2kg-network - pinecone-net depends_on: neo4j: condition: service_healthy ollama: condition: service_started # Optional: sentence-transformers and entity-embeddings are only needed for vector search # Traditional graph search works without these services # Neo4j - ARM64 compatible graph database (works with 64KB page size kernel) neo4j: image: neo4j:5-community ports: - '7474:7474' # HTTP - '7687:7687' # Bolt environment: - NEO4J_AUTH=neo4j/password123 - NEO4J_server_memory_heap_initial__size=512m - NEO4J_server_memory_heap_max__size=2G volumes: - neo4j_data:/data - neo4j_logs:/logs networks: - default restart: unless-stopped healthcheck: test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:7474 || exit 1"] interval: 15s timeout: 10s retries: 10 start_period: 60s # ArangoDB disabled - doesn't support ARM64 with 64KB page size # Uncomment if using 4KB page kernel (linux-nvidia-6.14) # arangodb: # image: arangodb:latest # ports: # - '8529:8529' # environment: # - ARANGO_NO_AUTH=1 # volumes: # - arangodb_data:/var/lib/arangodb3 # - arangodb_apps_data:/var/lib/arangodb3-apps ollama: build: context: ../services/ollama dockerfile: Dockerfile image: ollama-custom:latest container_name: ollama-compose ports: - '11434:11434' volumes: - ollama_data:/root/.ollama environment: - NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible to the container - NVIDIA_DRIVER_CAPABILITIES=compute,utility # Required capabilities for CUDA - CUDA_VISIBLE_DEVICES=0 # Use first GPU - OLLAMA_FLASH_ATTENTION=1 # Enable flash attention for better performance - OLLAMA_KEEP_ALIVE=30m # Keep models loaded for 30 minutes - OLLAMA_NUM_PARALLEL=4 # Process 4 requests in parallel - DGX Spark has unified memory - OLLAMA_MAX_LOADED_MODELS=1 # Load only one model at a time to avoid VRAM contention - OLLAMA_KV_CACHE_TYPE=q8_0 # Reduce KV cache VRAM usage with minimal performance impact - OLLAMA_GPU_LAYERS=-1 # Force all layers on GPU - OLLAMA_LLM_LIBRARY=cuda # Force CUDA backend networks: - default restart: unless-stopped deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: ["CMD", "ollama", "list"] interval: 30s timeout: 10s retries: 3 start_period: 60s # Optional services for vector search (NOT required for traditional graph search) # Traditional graph search works with just: app, arangodb, and ollama sentence-transformers: build: context: ../services/sentence-transformers dockerfile: Dockerfile ports: - '8000:80' environment: - MODEL_NAME=all-MiniLM-L6-v2 networks: - default restart: unless-stopped profiles: - vector-search # Only start with: docker compose --profile vector-search up qdrant: image: qdrant/qdrant:latest container_name: qdrant ports: - "6333:6333" - "6334:6334" volumes: - qdrant_data:/qdrant/storage networks: - pinecone-net restart: unless-stopped profiles: - vector-search # Only start with: docker compose --profile vector-search up qdrant-init: image: curlimages/curl:latest depends_on: - qdrant restart: "no" entrypoint: /bin/sh command: - -c - | echo 'Waiting for Qdrant to start...' sleep 5 echo 'Checking if entity-embeddings collection exists...' RESPONSE=$(curl -s http://qdrant:6333/collections/entity-embeddings) if echo "$RESPONSE" | grep -q '"status":"ok"'; then echo 'entity-embeddings collection already exists' else echo 'Creating collection entity-embeddings...' curl -X PUT http://qdrant:6333/collections/entity-embeddings \ -H 'Content-Type: application/json' \ -d '{"vectors":{"size":384,"distance":"Cosine"}}' echo '' echo 'entity-embeddings collection created successfully' fi echo 'Checking if document-embeddings collection exists...' RESPONSE=$(curl -s http://qdrant:6333/collections/document-embeddings) if echo "$RESPONSE" | grep -q '"status":"ok"'; then echo 'document-embeddings collection already exists' else echo 'Creating collection document-embeddings...' curl -X PUT http://qdrant:6333/collections/document-embeddings \ -H 'Content-Type: application/json' \ -d '{"vectors":{"size":384,"distance":"Cosine"}}' echo '' echo 'document-embeddings collection created successfully' fi networks: - pinecone-net profiles: - vector-search volumes: neo4j_data: neo4j_logs: ollama_data: qdrant_data: # arangodb_data: # arangodb_apps_data: networks: default: driver: bridge txt2kg-network: driver: bridge pinecone-net: name: pinecone