services: app: build: context: ../.. dockerfile: deploy/app/Dockerfile ports: - '3001:3000' environment: - ARANGODB_URL=http://arangodb:8529 - ARANGODB_DB=txt2kg - QDRANT_URL=http://qdrant:6333 - VECTOR_DB_TYPE=qdrant - LANGCHAIN_TRACING_V2=true - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80 - MODEL_NAME=all-MiniLM-L6-v2 - EMBEDDINGS_API_URL=http://sentence-transformers:80 - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA - NODE_TLS_REJECT_UNAUTHORIZED=0 - OLLAMA_BASE_URL=http://ollama:11434/v1 - OLLAMA_MODEL=llama3.1:8b - REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083 - NVIDIA_API_KEY=${NVIDIA_API_KEY:-} # Node.js timeout configurations for large model processing - NODE_OPTIONS=--max-http-header-size=80000 - UV_THREADPOOL_SIZE=128 - HTTP_TIMEOUT=1800000 - REQUEST_TIMEOUT=1800000 networks: - default - txt2kg-network - pinecone-net depends_on: - arangodb - ollama # Optional: sentence-transformers and entity-embeddings are only needed for vector search # Traditional graph search works without these services arangodb: image: arangodb:latest ports: - '8529:8529' environment: - ARANGO_NO_AUTH=1 volumes: - arangodb_data:/var/lib/arangodb3 - arangodb_apps_data:/var/lib/arangodb3-apps arangodb-init: image: arangodb:latest depends_on: arangodb: condition: service_started restart: on-failure entrypoint: > sh -c " echo 'Waiting for ArangoDB to start...' && sleep 10 && echo 'Creating txt2kg database...' && arangosh --server.endpoint tcp://arangodb:8529 --server.authentication false --javascript.execute-string 'try { db._createDatabase(\"txt2kg\"); console.log(\"Database txt2kg created successfully!\"); } catch(e) { if(e.message.includes(\"duplicate\")) { console.log(\"Database txt2kg already exists\"); } else { throw e; } }' " ollama: build: context: ../services/ollama dockerfile: Dockerfile image: ollama-custom:latest container_name: ollama-compose ports: - '11434:11434' volumes: - ollama_data:/root/.ollama environment: - NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible to the container - NVIDIA_DRIVER_CAPABILITIES=compute,utility # Required capabilities for CUDA - OLLAMA_FLASH_ATTENTION=1 # Enable flash attention for better performance - OLLAMA_KEEP_ALIVE=30m # Keep models loaded for 30 minutes - OLLAMA_NUM_PARALLEL=4 # Process 4 requests in parallel - DGX Spark has unified memory - OLLAMA_MAX_LOADED_MODELS=1 # Load only one model at a time to avoid VRAM contention - OLLAMA_KV_CACHE_TYPE=q8_0 # Reduce KV cache VRAM usage with minimal performance impact networks: - default restart: unless-stopped deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: ["CMD", "ollama", "list"] interval: 30s timeout: 10s retries: 3 start_period: 60s # Optional services for vector search (NOT required for traditional graph search) # Traditional graph search works with just: app, arangodb, and ollama sentence-transformers: build: context: ../services/sentence-transformers dockerfile: Dockerfile ports: - '8000:80' environment: - MODEL_NAME=all-MiniLM-L6-v2 networks: - default restart: unless-stopped profiles: - vector-search # Only start with: docker compose --profile vector-search up qdrant: image: qdrant/qdrant:latest container_name: qdrant ports: - "6333:6333" - "6334:6334" volumes: - qdrant_data:/qdrant/storage networks: - pinecone-net restart: unless-stopped profiles: - vector-search # Only start with: docker compose --profile vector-search up qdrant-init: image: curlimages/curl:latest depends_on: - qdrant restart: "no" entrypoint: /bin/sh command: - -c - | echo 'Waiting for Qdrant to start...' sleep 5 echo 'Checking if entity-embeddings collection exists...' RESPONSE=$(curl -s http://qdrant:6333/collections/entity-embeddings) if echo "$RESPONSE" | grep -q '"status":"ok"'; then echo 'entity-embeddings collection already exists' else echo 'Creating collection entity-embeddings...' curl -X PUT http://qdrant:6333/collections/entity-embeddings \ -H 'Content-Type: application/json' \ -d '{"vectors":{"size":384,"distance":"Cosine"}}' echo '' echo 'entity-embeddings collection created successfully' fi echo 'Checking if document-embeddings collection exists...' RESPONSE=$(curl -s http://qdrant:6333/collections/document-embeddings) if echo "$RESPONSE" | grep -q '"status":"ok"'; then echo 'document-embeddings collection already exists' else echo 'Creating collection document-embeddings...' curl -X PUT http://qdrant:6333/collections/document-embeddings \ -H 'Content-Type: application/json' \ -d '{"vectors":{"size":384,"distance":"Cosine"}}' echo '' echo 'document-embeddings collection created successfully' fi networks: - pinecone-net profiles: - vector-search volumes: arangodb_data: arangodb_apps_data: ollama_data: qdrant_data: networks: default: driver: bridge txt2kg-network: driver: bridge pinecone-net: name: pinecone