services:
  app:
    build:
      context: ../..
      dockerfile: deploy/app/Dockerfile
    ports:
      - '3001:3000'
    environment:
      - ARANGODB_URL=http://arangodb:8529
      - ARANGODB_DB=txt2kg
      - PINECONE_HOST=entity-embeddings
      - PINECONE_PORT=5081
      - PINECONE_API_KEY=pclocal
      - PINECONE_ENVIRONMENT=local
      - LANGCHAIN_TRACING_V2=true
      - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
      - MODEL_NAME=all-MiniLM-L6-v2
      - EMBEDDINGS_API_URL=http://sentence-transformers:80
      - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
      - NODE_TLS_REJECT_UNAUTHORIZED=0
      - OLLAMA_BASE_URL=http://ollama:11434/v1
      - OLLAMA_MODEL=llama3.1:8b
      - REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083
      - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
      # Node.js timeout configurations for large model processing
      - NODE_OPTIONS=--max-http-header-size=80000
      - UV_THREADPOOL_SIZE=128
      - HTTP_TIMEOUT=1800000
      - REQUEST_TIMEOUT=1800000
    networks:
      - default
      - txt2kg-network
      - pinecone-net
    depends_on:
      - arangodb
      - ollama
      # Optional: sentence-transformers and entity-embeddings are only needed for vector search
      # Traditional graph search works without these services
  arangodb:
    image: arangodb:latest
    ports:
      - '8529:8529'
    environment:
      - ARANGO_NO_AUTH=1
    volumes:
      - arangodb_data:/var/lib/arangodb3
      - arangodb_apps_data:/var/lib/arangodb3-apps
  arangodb-init:
    image: arangodb:latest
    depends_on:
      arangodb:
        condition: service_started
    restart: on-failure
    entrypoint: >
      sh -c "
        echo 'Waiting for ArangoDB to start...' &&
        sleep 10 &&
        echo 'Creating txt2kg database...' &&
        arangosh --server.endpoint tcp://arangodb:8529 --server.authentication false --javascript.execute-string 'try { db._createDatabase(\"txt2kg\"); console.log(\"Database txt2kg created successfully!\"); } catch(e) { if(e.message.includes(\"duplicate\")) { console.log(\"Database txt2kg already exists\"); } else { throw e; } }'
      "
  ollama:
    build:
      context: ../services/ollama
      dockerfile: Dockerfile
    image: ollama-custom:latest
    container_name: ollama-compose
    ports:
      - '11434:11434'
    volumes:
      - ollama_data:/root/.ollama
    environment:
      - NVIDIA_VISIBLE_DEVICES=all        # Make all GPUs visible to the container
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility  # Required capabilities for CUDA
      - OLLAMA_FLASH_ATTENTION=1          # Enable flash attention for better performance
      - OLLAMA_KEEP_ALIVE=30m             # Keep models loaded for 30 minutes
      - OLLAMA_NUM_PARALLEL=1             # Process one request at a time for 70B models
      - OLLAMA_MAX_LOADED_MODELS=1        # Load only one model at a time to avoid VRAM contention
      - OLLAMA_KV_CACHE_TYPE=q8_0         # Reduce KV cache VRAM usage with minimal performance impact
    networks:
      - default
    restart: unless-stopped
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
  
  # Optional services for vector search (NOT required for traditional graph search)
  # Traditional graph search works with just: app, arangodb, and ollama
  sentence-transformers:
    build:
      context: ../services/sentence-transformers
      dockerfile: Dockerfile
    ports:
      - '8000:80'
    environment:
      - MODEL_NAME=all-MiniLM-L6-v2
    networks:
      - default
    restart: unless-stopped
    profiles:
      - vector-search  # Only start with: docker compose --profile vector-search up
  entity-embeddings:
    image: ghcr.io/pinecone-io/pinecone-index:latest
    container_name: entity-embeddings
    environment:
      PORT: 5081
      INDEX_TYPE: serverless
      VECTOR_TYPE: dense
      DIMENSION: 384
      METRIC: cosine
      INDEX_NAME: entity-embeddings
    ports:
      - "5081:5081"
    platform: linux/amd64
    networks:
      - pinecone-net
    restart: unless-stopped
    profiles:
      - vector-search  # Only start with: docker compose --profile vector-search up

volumes:
  arangodb_data:
  arangodb_apps_data:
  ollama_data:

networks:
  default:
    driver: bridge
  txt2kg-network:
    driver: bridge
  pinecone-net:
    name: pinecone