feat(docker): add vector search services and GPU configuration

- Add optional Pinecone and sentence-transformers services for vector search - Configure NVIDIA GPU support with proper environment variables - Add new environment variables for embeddings and Pinecone - Add docker compose profiles to optionally enable vector-search - Improve CUDA configuration for Ollama service - Add pinecone-net network for service communication
2026-06-23 14:49:31 +00:00 · 2025-10-19 19:56:55 -07:00 · 2025-10-19 19:56:55 -07:00 · 8c1d2ae9f3
commit 8c1d2ae9f3
parent 9dc734eee5
1 changed files with 49 additions and 5 deletions
--- a/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
+++ b/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
@ -1,3 +1,4 @@
 services:
  app:
    build:
@ -8,7 +9,14 @@ services:
    environment:
      - ARANGODB_URL=http://arangodb:8529
      - ARANGODB_DB=txt2kg
      - PINECONE_HOST=entity-embeddings
      - PINECONE_PORT=5081
      - PINECONE_API_KEY=pclocal
      - PINECONE_ENVIRONMENT=local
      - LANGCHAIN_TRACING_V2=true
      - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
      - MODEL_NAME=all-MiniLM-L6-v2
      - EMBEDDINGS_API_URL=http://sentence-transformers:80
      - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
      - NODE_TLS_REJECT_UNAUTHORIZED=0
      - OLLAMA_BASE_URL=http://ollama:11434/v1
@ -23,9 +31,12 @@ services:
    networks:
      - default
      - txt2kg-network
      - pinecone-net
    depends_on:
      - arangodb
      - ollama
      # Optional: sentence-transformers and entity-embeddings are only needed for vector search
      # Traditional graph search works without these services
  arangodb:
    image: arangodb:latest
    ports:
@ -59,16 +70,13 @@ services:
    volumes:
      - ollama_data:/root/.ollama
    environment:
      - NVIDIA_VISIBLE_DEVICES=all        # Make all GPUs visible to the container
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility  # Required capabilities for CUDA
      - OLLAMA_FLASH_ATTENTION=1          # Enable flash attention for better performance
      - OLLAMA_KEEP_ALIVE=30m             # Keep models loaded for 30 minutes
      - OLLAMA_CUDA=1                     # Enable CUDA acceleration
      - OLLAMA_LLM_LIBRARY=cuda           # Use CUDA library for LLM operations
      - OLLAMA_NUM_PARALLEL=1             # Process one request at a time for 70B models
      - OLLAMA_MAX_LOADED_MODELS=1        # Load only one model at a time to avoid VRAM contention
      - OLLAMA_KV_CACHE_TYPE=q8_0         # Reduce KV cache VRAM usage with minimal performance impact
      - OLLAMA_GPU_LAYERS=999             # Use maximum GPU layers
      - OLLAMA_GPU_MEMORY_FRACTION=0.9    # Use 90% of GPU memory
      - CUDA_VISIBLE_DEVICES=0            # Use GPU 0 (change to 'all' for multi-GPU)
    networks:
      - default
    restart: unless-stopped
@ -86,6 +94,40 @@ services:
      retries: 3
      start_period: 60s
  # Optional services for vector search (NOT required for traditional graph search)
  # Traditional graph search works with just: app, arangodb, and ollama
  sentence-transformers:
    build:
      context: ../services/sentence-transformers
      dockerfile: Dockerfile
    ports:
      - '8000:80'
    environment:
      - MODEL_NAME=all-MiniLM-L6-v2
    networks:
      - default
    restart: unless-stopped
    profiles:
      - vector-search  # Only start with: docker compose --profile vector-search up
  entity-embeddings:
    image: ghcr.io/pinecone-io/pinecone-index:latest
    container_name: entity-embeddings
    environment:
      PORT: 5081
      INDEX_TYPE: serverless
      VECTOR_TYPE: dense
      DIMENSION: 384
      METRIC: cosine
      INDEX_NAME: entity-embeddings
    ports:
      - "5081:5081"
    platform: linux/amd64
    networks:
      - pinecone-net
    restart: unless-stopped
    profiles:
      - vector-search  # Only start with: docker compose --profile vector-search up
 volumes:
  arangodb_data:
  arangodb_apps_data:
@ -96,3 +138,5 @@ networks:
    driver: bridge
  txt2kg-network:
    driver: bridge
  pinecone-net:
    name: pinecone