############################################################################### # Clinical Intelligence — Docker Compose # # Manages the infrastructure layer: LLM inference and protein structure # prediction. OpenShell + OpenClaw run on the host. # # Quick start: # cp .env.example .env # fill in NGC_API_KEY # make up # start all services # make setup # create sandbox + deploy config # make test # run test suite (levels 1-3) # make test-full # run all levels including agent tests # # Individual services: # docker compose up ollama -d # just the LLM # docker compose up openfold3 -d # just protein prediction # docker compose logs -f ollama # watch Ollama logs ############################################################################### services: # ── Ollama (local LLM inference) ────────────────────────────────── # GPU pinning: LLM_GPU defaults to "0". On dual-GPU stations (e.g. RTX PRO # 6000 + GB300), set LLM_GPU in .env to the GB300 index — the RTX PRO 6000 # is too small (98 GB) for Nemotron-3-Super (~94 GB resident). # Find the GB300 index with: # nvidia-smi --query-gpu=index,name --format=csv,noheader | awk -F', ' '/GB300/{print $1; exit}' ollama: image: ollama/ollama:latest ports: - "${OLLAMA_PORT:-11434}:11434" volumes: - ollama-data:/root/.ollama environment: OLLAMA_HOST: "0.0.0.0" OLLAMA_KEEP_ALIVE: "4h" deploy: resources: reservations: devices: - driver: nvidia device_ids: ["${LLM_GPU:-0}"] capabilities: [gpu] healthcheck: test: ["CMD", "ollama", "list"] interval: 10s timeout: 5s retries: 30 start_period: 30s restart: unless-stopped # ── Model puller (one-shot: pulls the model if missing) ────────── model-pull: image: ollama/ollama:latest depends_on: ollama: condition: service_healthy entrypoint: ["sh", "-c"] command: - | if ! ollama list 2>/dev/null | grep -q "${OLLAMA_MODEL:-nemotron-3-super:120b-a12b}"; then echo "Pulling ${OLLAMA_MODEL:-nemotron-3-super:120b-a12b}..." ollama pull "${OLLAMA_MODEL:-nemotron-3-super:120b-a12b}" else echo "Model already available." fi environment: OLLAMA_HOST: "http://ollama:11434" restart: "no" # ── OpenFold3 NIM (protein structure prediction) ────────────────── # GPU pinning: OPENFOLD_GPU defaults to "0". OpenFold3's PyTorch backend # crashes on multi-GPU containers with: # "device >= 0 && device < num_gpus INTERNAL ASSERT FAILED" # Pinning to a single device avoids the crash loop. Set OPENFOLD_GPU in # .env to share or separate from the LLM GPU. # Image pull requires `docker login nvcr.io` first (see `make ngc-login` # or instructions.md Step 2). NGC_API_KEY in .env alone is not enough — # NGC_API_KEY is the runtime credential; docker login is the pull credential. openfold3: image: nvcr.io/nim/openfold/openfold3:latest ports: - "${OPENFOLD_PORT:-8000}:8000" environment: NGC_API_KEY: ${NGC_API_KEY:?Set NGC_API_KEY in .env} NIM_OPTIMIZED_BACKEND: torch_baseline shm_size: 16g ulimits: memlock: -1 stack: 67108864 deploy: resources: reservations: devices: - driver: nvidia device_ids: ["${OPENFOLD_GPU:-0}"] capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-sf", "http://localhost:8000/v1/health/ready"] interval: 30s timeout: 10s retries: 20 start_period: 180s restart: unless-stopped # ── Test runner ─────────────────────────────────────────────────── test: build: context: . dockerfile: docker/test/Dockerfile environment: OLLAMA_HOST: "ollama" OPENFOLD_HOST: "openfold3" SANDBOX_NAME: "${SANDBOX_NAME:-clinical-sandbox}" volumes: - ./test-results:/app/test-results - /var/run/docker.sock:/var/run/docker.sock:ro depends_on: ollama: condition: service_healthy profiles: - test entrypoint: ["bash", "scripts/test-all.sh"] command: ["--level", "3", "--verbose"] volumes: ollama-data: