services: app: build: context: ../.. dockerfile: deploy/app/Dockerfile ports: - '3001:3000' environment: - ARANGODB_URL=http://arangodb:8529 - ARANGODB_DB=txt2kg - PINECONE_HOST=entity-embeddings - PINECONE_PORT=5081 - PINECONE_API_KEY=pclocal - PINECONE_ENVIRONMENT=local - LANGCHAIN_TRACING_V2=true - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80 - MODEL_NAME=all-MiniLM-L6-v2 - EMBEDDINGS_API_URL=http://sentence-transformers:80 - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA - NODE_TLS_REJECT_UNAUTHORIZED=0 - OLLAMA_BASE_URL=http://ollama:11434/v1 - OLLAMA_MODEL=llama3.1:8b - REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083 - NVIDIA_API_KEY=${NVIDIA_API_KEY:-} # Node.js timeout configurations for large model processing - NODE_OPTIONS=--max-http-header-size=80000 - UV_THREADPOOL_SIZE=128 - HTTP_TIMEOUT=1800000 - REQUEST_TIMEOUT=1800000 networks: - default - txt2kg-network - pinecone-net depends_on: - arangodb - ollama # Optional: sentence-transformers and entity-embeddings are only needed for vector search # Traditional graph search works without these services arangodb: image: arangodb:latest ports: - '8529:8529' environment: - ARANGO_NO_AUTH=1 volumes: - arangodb_data:/var/lib/arangodb3 - arangodb_apps_data:/var/lib/arangodb3-apps arangodb-init: image: arangodb:latest depends_on: arangodb: condition: service_started restart: on-failure entrypoint: > sh -c " echo 'Waiting for ArangoDB to start...' && sleep 10 && echo 'Creating txt2kg database...' && arangosh --server.endpoint tcp://arangodb:8529 --server.authentication false --javascript.execute-string 'try { db._createDatabase(\"txt2kg\"); console.log(\"Database txt2kg created successfully!\"); } catch(e) { if(e.message.includes(\"duplicate\")) { console.log(\"Database txt2kg already exists\"); } else { throw e; } }' " ollama: build: context: ../services/ollama dockerfile: Dockerfile image: ollama-custom:latest container_name: ollama-compose ports: - '11434:11434' volumes: - ollama_data:/root/.ollama environment: - NVIDIA_VISIBLE_DEVICES=all # Make all GPUs visible to the container - NVIDIA_DRIVER_CAPABILITIES=compute,utility # Required capabilities for CUDA - OLLAMA_FLASH_ATTENTION=1 # Enable flash attention for better performance - OLLAMA_KEEP_ALIVE=30m # Keep models loaded for 30 minutes - OLLAMA_NUM_PARALLEL=1 # Process one request at a time for 70B models - OLLAMA_MAX_LOADED_MODELS=1 # Load only one model at a time to avoid VRAM contention - OLLAMA_KV_CACHE_TYPE=q8_0 # Reduce KV cache VRAM usage with minimal performance impact networks: - default restart: unless-stopped deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] interval: 30s timeout: 10s retries: 3 start_period: 60s # Optional services for vector search (NOT required for traditional graph search) # Traditional graph search works with just: app, arangodb, and ollama sentence-transformers: build: context: ../services/sentence-transformers dockerfile: Dockerfile ports: - '8000:80' environment: - MODEL_NAME=all-MiniLM-L6-v2 networks: - default restart: unless-stopped profiles: - vector-search # Only start with: docker compose --profile vector-search up entity-embeddings: image: ghcr.io/pinecone-io/pinecone-index:latest container_name: entity-embeddings environment: PORT: 5081 INDEX_TYPE: serverless VECTOR_TYPE: dense DIMENSION: 384 METRIC: cosine INDEX_NAME: entity-embeddings ports: - "5081:5081" platform: linux/amd64 networks: - pinecone-net restart: unless-stopped profiles: - vector-search # Only start with: docker compose --profile vector-search up volumes: arangodb_data: arangodb_apps_data: ollama_data: networks: default: driver: bridge txt2kg-network: driver: bridge pinecone-net: name: pinecone