From be2db4604f022e64157c74b5c2b2b8fa9292f0d1 Mon Sep 17 00:00:00 2001
From: Ev Lacey <github@everettlacey.com>
Date: Tue, 2 Dec 2025 16:25:52 -0800
Subject: [PATCH] Update vLLM container version to 25.11-py3

---
 nvidia/vllm/README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/nvidia/vllm/README.md b/nvidia/vllm/README.md
index 25e733c..efc1bed 100644
--- a/nvidia/vllm/README.md
+++ b/nvidia/vllm/README.md
@@ -57,9 +57,9 @@ support for ARM64.
 
 ## Step 1. Pull vLLM container image
 
-Find the latest container build from https://catalog.ngc.nvidia.com/orgs/nvidia/containers/vllm?version=25.09-py3
+Find the latest container build from https://catalog.ngc.nvidia.com/orgs/nvidia/containers/vllm?version=25.11-py3
 ```
-docker pull nvcr.io/nvidia/vllm:25.09-py3
+docker pull nvcr.io/nvidia/vllm:25.11-py3
 ```
 
 ## Step 2. Test vLLM in container
@@ -68,7 +68,7 @@ Launch the container and start vLLM server with a test model to verify basic fun
 
 ```bash
 docker run -it --gpus all -p 8000:8000 \
-nvcr.io/nvidia/vllm:25.09-py3 \
+nvcr.io/nvidia/vllm:25.11-py3 \
 vllm serve "Qwen/Qwen2.5-Math-1.5B-Instruct"
 ```
 
@@ -96,7 +96,7 @@ Expected response should contain `"content": "204"` or similar mathematical calc
 For container approach (non-destructive):
 
 ```bash
-docker rm $(docker ps -aq --filter ancestor=nvcr.io/nvidia/vllm:25.09-py3)
+docker rm $(docker ps -aq --filter ancestor=nvcr.io/nvidia/vllm:25.11-py3)
 docker rmi nvcr.io/nvidia/vllm
 ```
 
@@ -150,8 +150,8 @@ After this, you should be able to run docker commands without using `sudo`.
 
 
 ```bash
-docker pull nvcr.io/nvidia/vllm:25.09-py3
-export VLLM_IMAGE=nvcr.io/nvidia/vllm:25.09-py3
+docker pull nvcr.io/nvidia/vllm:25.11-py3
+export VLLM_IMAGE=nvcr.io/nvidia/vllm:25.11-py3
 ```