feat(docker): add vector search services and GPU configuration

- Add optional Pinecone and sentence-transformers services for vector search - Configure NVIDIA GPU support with proper environment variables - Add new environment variables for embeddings and Pinecone - Add docker compose profiles to optionally enable vector-search - Improve CUDA configuration for Ollama service - Add pinecone-net network for service communication
2026-06-19 12:59:34 +00:00 · 2025-10-19 19:56:55 -07:00 · 2025-10-19 19:56:55 -07:00 · 8c1d2ae9f3
commit 8c1d2ae9f3
parent 9dc734eee5
1 changed files with 49 additions and 5 deletions
--- a/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
+++ b/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
@ -1,3 +1,4 @@
+
 services:
  app:
    build:
@ -8,7 +9,14 @@ services:
    environment:
      - ARANGODB_URL=http://arangodb:8529
      - ARANGODB_DB=txt2kg
+      - PINECONE_HOST=entity-embeddings
+      - PINECONE_PORT=5081
+      - PINECONE_API_KEY=pclocal
+      - PINECONE_ENVIRONMENT=local
      - LANGCHAIN_TRACING_V2=true
+      - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
+      - MODEL_NAME=all-MiniLM-L6-v2
+      - EMBEDDINGS_API_URL=http://sentence-transformers:80
      - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
      - NODE_TLS_REJECT_UNAUTHORIZED=0
      - OLLAMA_BASE_URL=http://ollama:11434/v1
@ -23,9 +31,12 @@ services:
    networks:
      - default
      - txt2kg-network
+      - pinecone-net
    depends_on:
      - arangodb
      - ollama
+      # Optional: sentence-transformers and entity-embeddings are only needed for vector search
+      # Traditional graph search works without these services
  arangodb:
    image: arangodb:latest
    ports:
@ -59,16 +70,13 @@ services:
    volumes:
      - ollama_data:/root/.ollama
    environment:
+      - NVIDIA_VISIBLE_DEVICES=all        # Make all GPUs visible to the container
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility  # Required capabilities for CUDA
      - OLLAMA_FLASH_ATTENTION=1          # Enable flash attention for better performance
      - OLLAMA_KEEP_ALIVE=30m             # Keep models loaded for 30 minutes
-      - OLLAMA_CUDA=1                     # Enable CUDA acceleration
-      - OLLAMA_LLM_LIBRARY=cuda           # Use CUDA library for LLM operations
      - OLLAMA_NUM_PARALLEL=1             # Process one request at a time for 70B models
      - OLLAMA_MAX_LOADED_MODELS=1        # Load only one model at a time to avoid VRAM contention
      - OLLAMA_KV_CACHE_TYPE=q8_0         # Reduce KV cache VRAM usage with minimal performance impact
-      - OLLAMA_GPU_LAYERS=999             # Use maximum GPU layers
-      - OLLAMA_GPU_MEMORY_FRACTION=0.9    # Use 90% of GPU memory
-      - CUDA_VISIBLE_DEVICES=0            # Use GPU 0 (change to 'all' for multi-GPU)
    networks:
      - default
    restart: unless-stopped
@ -85,6 +93,40 @@ services:
      timeout: 10s
      retries: 3
      start_period: 60s
+  
+  # Optional services for vector search (NOT required for traditional graph search)
+  # Traditional graph search works with just: app, arangodb, and ollama
+  sentence-transformers:
+    build:
+      context: ../services/sentence-transformers
+      dockerfile: Dockerfile
+    ports:
+      - '8000:80'
+    environment:
+      - MODEL_NAME=all-MiniLM-L6-v2
+    networks:
+      - default
+    restart: unless-stopped
+    profiles:
+      - vector-search  # Only start with: docker compose --profile vector-search up
+  entity-embeddings:
+    image: ghcr.io/pinecone-io/pinecone-index:latest
+    container_name: entity-embeddings
+    environment:
+      PORT: 5081
+      INDEX_TYPE: serverless
+      VECTOR_TYPE: dense
+      DIMENSION: 384
+      METRIC: cosine
+      INDEX_NAME: entity-embeddings
+    ports:
+      - "5081:5081"
+    platform: linux/amd64
+    networks:
+      - pinecone-net
+    restart: unless-stopped
+    profiles:
+      - vector-search  # Only start with: docker compose --profile vector-search up

 volumes:
  arangodb_data:
@ -96,3 +138,5 @@ networks:
    driver: bridge
  txt2kg-network:
    driver: bridge
+  pinecone-net:
+    name: pinecone