version: '3.8' services: trtllm: image: nvcr.io/nvidia/tensorrt-llm/release:1.0.0rc3 deploy: replicas: 2 restart_policy: condition: any delay: 5s max_attempts: 3 window: 120s resources: reservations: generic_resources: - discrete_resource_spec: kind: 'NVIDIA_GPU' value: 1 environment: - UCX_NET_DEVICES=enp1s0f0np0,enp1s0f1np1 - NCCL_SOCKET_IFNAME=enp1s0f0np0,enp1s0f1np1 - OMPI_MCA_btl_tcp_if_include=enp1s0f0np0,enp1s0f1np1 - OMPI_MCA_orte_default_hostfile=/etc/openmpi-hostfile - OMPI_MCA_rmaps_ppr_n_pernode=1 - OMPI_ALLOW_RUN_AS_ROOT=1 - OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 entrypoint: /opt/trtllm-mn-entrypoint.sh volumes: - ~/.cache/huggingface/:/root/.cache/huggingface/ - ~/trtllm-mn-entrypoint.sh:/opt/trtllm-mn-entrypoint.sh - ~/.ssh:/tmp/.ssh:ro ulimits: memlock: -1 stack: 67108864 networks: - host networks: host: name: host external: true