# txt2kg Docker Compose - Neo4j + vLLM (GPU-accelerated) # # Optional stack optimized for DGX Spark/GB300 with unified memory support # # Usage: # ./start.sh --vllm # Use this compose file # ./start.sh --vllm --vector-search # Add Qdrant + Sentence Transformers services: app: build: context: ../.. dockerfile: deploy/app/Dockerfile ports: - '3001:3000' environment: # Neo4j configuration - NEO4J_URI=bolt://neo4j:7687 - NEO4J_USER=neo4j - NEO4J_PASSWORD=password123 - GRAPH_DB_TYPE=neo4j # Disable ArangoDB - ARANGODB_URL=http://localhost:8529 - ARANGODB_DB=txt2kg # vLLM configuration (GPU-accelerated) - VLLM_BASE_URL=http://vllm:8001/v1 - VLLM_MODEL=nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8 # Disable Ollama - OLLAMA_BASE_URL=http://localhost:11434/v1 - OLLAMA_MODEL=disabled # Vector DB configuration - QDRANT_URL=http://qdrant:6333 - VECTOR_DB_TYPE=qdrant # Embeddings configuration - LANGCHAIN_TRACING_V2=true - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80 - MODEL_NAME=all-MiniLM-L6-v2 - EMBEDDINGS_API_URL=http://sentence-transformers:80 # Other settings - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA - NODE_TLS_REJECT_UNAUTHORIZED=0 - NVIDIA_API_KEY=${NVIDIA_API_KEY:-} - NODE_OPTIONS=--max-http-header-size=80000 - UV_THREADPOOL_SIZE=128 - HTTP_TIMEOUT=1800000 - REQUEST_TIMEOUT=1800000 networks: - default - txt2kg-network - qdrant-net depends_on: neo4j: condition: service_healthy vllm: condition: service_started # Neo4j - Graph database neo4j: image: neo4j:5-community ports: - '7474:7474' - '7687:7687' environment: - NEO4J_AUTH=neo4j/password123 - NEO4J_server_memory_heap_initial__size=512m - NEO4J_server_memory_heap_max__size=2G volumes: - neo4j_data:/data - neo4j_logs:/logs networks: - default restart: unless-stopped healthcheck: test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:7474 || exit 1"] interval: 15s timeout: 10s retries: 10 start_period: 60s # vLLM - GPU-accelerated LLM with unified memory support vllm: build: context: ../services/vllm dockerfile: Dockerfile container_name: vllm-service ports: - '8001:8001' ipc: host ulimits: memlock: -1 stack: 67108864 shm_size: '16gb' environment: - VLLM_MODEL=nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8 - VLLM_TENSOR_PARALLEL_SIZE=1 - VLLM_MAX_MODEL_LEN=32768 - VLLM_GPU_MEMORY_UTILIZATION=0.9 - VLLM_MAX_NUM_SEQS=32 - VLLM_MAX_NUM_BATCHED_TOKENS=32768 - VLLM_KV_CACHE_DTYPE=auto - VLLM_PORT=8001 - VLLM_HOST=0.0.0.0 - CUDA_VISIBLE_DEVICES=0 - NCCL_DEBUG=INFO - CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True - VLLM_CPU_OFFLOAD_GB=0 volumes: - vllm_models:/app/models - /tmp:/tmp - ~/.cache/huggingface:/root/.cache/huggingface networks: - default restart: unless-stopped deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8001/health"] interval: 60s timeout: 30s retries: 30 start_period: 1800s # Optional: Vector search services sentence-transformers: build: context: ../services/sentence-transformers dockerfile: Dockerfile ports: - '8000:80' environment: - MODEL_NAME=all-MiniLM-L6-v2 networks: - default restart: unless-stopped profiles: - vector-search qdrant: image: qdrant/qdrant:latest container_name: qdrant ports: - "6333:6333" - "6334:6334" volumes: - qdrant_data:/qdrant/storage networks: - qdrant-net restart: unless-stopped profiles: - vector-search qdrant-init: image: curlimages/curl:latest depends_on: - qdrant restart: "no" entrypoint: /bin/sh command: - -c - | echo 'Waiting for Qdrant to start...' sleep 5 curl -X PUT http://qdrant:6333/collections/entity-embeddings \ -H 'Content-Type: application/json' \ -d '{"vectors":{"size":384,"distance":"Cosine"}}' || true curl -X PUT http://qdrant:6333/collections/document-embeddings \ -H 'Content-Type: application/json' \ -d '{"vectors":{"size":384,"distance":"Cosine"}}' || true echo 'Collections created' networks: - qdrant-net profiles: - vector-search volumes: neo4j_data: neo4j_logs: vllm_models: qdrant_data: networks: default: driver: bridge txt2kg-network: driver: bridge qdrant-net: name: qdrant-network