+
-
- {inferenceStats.tokensPerSecond > 0 && (
-
- {inferenceStats.tokensPerSecond} tokens/sec
-
- )}
);
}
diff --git a/nvidia/multi-agent-chatbot/assets/frontend/src/components/Sidebar.tsx b/nvidia/multi-agent-chatbot/assets/frontend/src/components/Sidebar.tsx
index 2af2f87..dd739f1 100644
--- a/nvidia/multi-agent-chatbot/assets/frontend/src/components/Sidebar.tsx
+++ b/nvidia/multi-agent-chatbot/assets/frontend/src/components/Sidebar.tsx
@@ -165,7 +165,7 @@ export default function Sidebar({
}, [refreshTrigger, fetchSources]);
// Add function to fetch chat metadata
- const fetchChatMetadata = async (chatId: string) => {
+ const fetchChatMetadata = useCallback(async (chatId: string) => {
try {
const response = await fetch(`/api/chat/${chatId}/metadata`);
if (response.ok) {
@@ -178,7 +178,7 @@ export default function Sidebar({
} catch (error) {
console.error(`Error fetching metadata for chat ${chatId}:`, error);
}
- };
+ }, []);
// Update fetchChats to also fetch metadata
const fetchChats = useCallback(async () => {
@@ -201,7 +201,7 @@ export default function Sidebar({
} finally {
setIsLoadingChats(false);
}
- }, []);
+ }, [fetchChatMetadata]);
// Fetch chats when history section is expanded
useEffect(() => {
diff --git a/nvidia/multi-agent-chatbot/assets/frontend/src/styles/QuerySection.module.css b/nvidia/multi-agent-chatbot/assets/frontend/src/styles/QuerySection.module.css
index c4c95b6..df7d006 100644
--- a/nvidia/multi-agent-chatbot/assets/frontend/src/styles/QuerySection.module.css
+++ b/nvidia/multi-agent-chatbot/assets/frontend/src/styles/QuerySection.module.css
@@ -346,11 +346,12 @@
.inputContainer {
display: flex;
- padding: 12px 16px;
+ padding: 12px 16px 4px 16px;
position: relative;
justify-content: center;
align-items: center;
background-color: #f9fafb;
+ margin-top: 0;
margin-bottom: 0;
width: 100%;
opacity: 0;
@@ -473,7 +474,7 @@
}
}
-.sendButton.show, .streamingCancelButton.show, .uploadButton.show {
+.sendButton.show, .streamingCancelButton.show {
animation: buttonAppear 0.3s ease forwards;
}
@@ -559,21 +560,6 @@
}
}
-.inferenceStats {
- position: fixed;
- bottom: 10px;
- right: 10px;
- background-color: rgba(0, 0, 0, 0.6);
- color: white;
- padding: 4px 8px;
- border-radius: 4px;
- font-size: 12px;
- font-family: var(--font-geist-mono);
- z-index: 10;
- pointer-events: none;
- opacity: 0.8;
-}
-
.querySection {
display: flex;
flex-direction: column;
@@ -665,97 +651,6 @@
40% { transform: scale(1); }
}
-.uploadButton {
- width: 36px;
- height: 36px;
- border-radius: 50%;
- display: flex;
- align-items: center;
- justify-content: center;
- border: none;
- cursor: pointer;
- background-color: transparent;
- color: #76B900;
- transition: all 0.2s ease;
- padding: 0;
- opacity: 0;
- visibility: hidden;
- position: absolute;
- left: 8px;
- top: 50%;
- transform: translateY(-50%);
- z-index: 2;
-}
-
-.uploadButton.show {
- animation: buttonAppear 0.3s ease forwards;
-}
-
-.uploadButton:hover {
- transform: translateY(-50%) scale(1.1);
- color: #669f00;
-}
-
-:global(.dark) .uploadButton {
- color: #76B900;
-}
-
-:global(.dark) .uploadButton:hover {
- color: #669f00;
-}
-
-.inputWrapper.dragging {
- border: 2px dashed #76B900;
- background-color: rgba(118, 185, 0, 0.1);
-}
-
-.imagePreview {
- position: relative;
- width: 150px;
- height: 100px;
- display: flex;
- align-items: center;
- justify-content: center;
-}
-
-/* :global(.dark) .imagePreview {
- background: #1f2937;
-} */
-
-.previewImage {
- width: 100%;
- height: 100%;
- object-fit: cover;
- display: block;
- border-radius: 8px;
-}
-
-.removeImageButton {
- position: absolute;
- top: -2px;
- right: -2px;
- width: 20px;
- height: 20px;
- border-radius: 50%;
- background: rgba(0, 0, 0, 0.251); /* red with 50% opacity */
- color: white;
- border: none;
- cursor: pointer;
- display: flex;
- align-items: center;
- justify-content: center;
- font-size: 12px;
- font-weight: bold;
- transition: background-color 0.2s ease;
-}
-
-.removeImageButton:hover {
- background: #e53e3e;
-}
-/* NEW CODE */
-
-
-
/* Adding styles for tool calling interface */
.toolCallingWrapper {
display: flex;
@@ -1033,10 +928,10 @@
.disclaimer {
text-align: center;
- font-size: 12px;
+ font-size: 10px;
color: #6b7280;
padding: 2px 16px;
- margin: -16px auto 0 auto;
+ margin: -12px auto 0 auto;
max-width: 600px;
line-height: 1.4;
opacity: 0.8;
@@ -1047,6 +942,15 @@
color: #9ca3af;
}
+.info {
+ color: #3b82f6;
+ font-weight: 500;
+}
+
+:global(.dark) .info {
+ color: #60a5fa;
+}
+
.warning {
color: #f59e0b;
font-weight: 500;
diff --git a/nvidia/vlm-finetuning/assets/Dockerfile b/nvidia/vlm-finetuning/assets/Dockerfile
index 68ffe29..e8a5458 100644
--- a/nvidia/vlm-finetuning/assets/Dockerfile
+++ b/nvidia/vlm-finetuning/assets/Dockerfile
@@ -159,10 +159,25 @@ RUN git clone https://github.com/triton-lang/triton.git && \
# install xformers from source for blackwell support
RUN git clone --depth=1 https://github.com/facebookresearch/xformers --recursive && \
cd xformers && \
+ git checkout 5146f2ab37b2163985c19fb4e8fbf6183e82f8ce && \
export TORCH_CUDA_ARCH_LIST="12.1" && \
python setup.py install && \
cd ..
# install unsloth without depedencies so we can build them from source
-RUN pip install unsloth unsloth_zoo bitsandbytes==0.48.0
+RUN pip install unsloth==2025.9.11 unsloth_zoo==2025.9.14 bitsandbytes==0.48.0
-CMD ["/bin/bash"]
\ No newline at end of file
+CMD ["/bin/bash"]
+
+
+# docker run \
+# --rm \
+# --gpus=all \
+# --ipc=host \
+# --net=host \
+# --ulimit memlock=-1 \
+# --ulimit stack=67108864 \
+# -w $(pwd) \
+# -v $(pwd):$(pwd) \
+# -v $HOME/.cache/huggingface:/root/.cache/huggingface \
+# nvcr.io/nvidia/vllm:25.09-py3 \
+# vllm serve "unsloth/Qwen2.5-VL-7B-Instruct" --port "8000" --served-model-name "base-model" --max-model-len 16384 --gpu-memory-utilization 0.3 --async-scheduling --enable_prefix_caching
\ No newline at end of file
diff --git a/nvidia/vlm-finetuning/assets/README.md b/nvidia/vlm-finetuning/assets/README.md
index 3f16a23..e4a5bf1 100644
--- a/nvidia/vlm-finetuning/assets/README.md
+++ b/nvidia/vlm-finetuning/assets/README.md
@@ -26,18 +26,13 @@ docker build --build-arg HF_TOKEN=$HF_TOKEN -t vlm_demo .
### 2. Launch the Container
```bash
-# Run the container with GPU support
-docker run -it \
- --gpus=all \
- --net=host \
- --ipc=host \
- --ulimit memlock=-1 \
- --ulimit stack=67108864 \
- -v $(pwd):/vlm_finetuning \
- -v $HOME/.cache/huggingface:/root/.cache/huggingface \
- vlm_demo
+# Enter the correct directory for building the image
+cd vlm-finetuning/assets
-# Enter the mounted directory
+# Run the container with GPU support
+sh launch.sh
+
+# Enter the mounted directory within the container
cd /vlm_finetuning
```
diff --git a/nvidia/vlm-finetuning/assets/launch.sh b/nvidia/vlm-finetuning/assets/launch.sh
index d7159cd..68ba681 100755
--- a/nvidia/vlm-finetuning/assets/launch.sh
+++ b/nvidia/vlm-finetuning/assets/launch.sh
@@ -20,7 +20,12 @@ docker run -it \
--gpus=all \
--net=host \
--ipc=host \
- -w $HOME \
- -v $HOME:$HOME \
+ --ulimit memlock=-1 \
+ --ulimit stack=67108864 \
+ -e HOST_HOME=$HOME \
+ -e HOST_PWD=$(pwd) \
+ -v $(pwd):/vlm_finetuning \
-v $HOME/.cache/huggingface:/root/.cache/huggingface \
+ -v /var/run/docker.sock:/var/run/docker.sock \
+ -v /usr/bin/docker:/usr/bin/docker \
vlm_demo
diff --git a/nvidia/vlm-finetuning/assets/ui_image/Image_VLM.py b/nvidia/vlm-finetuning/assets/ui_image/Image_VLM.py
index 22ef366..f343014 100644
--- a/nvidia/vlm-finetuning/assets/ui_image/Image_VLM.py
+++ b/nvidia/vlm-finetuning/assets/ui_image/Image_VLM.py
@@ -15,20 +15,20 @@
# limitations under the License.
#
-from unsloth import FastVisionModel
-
import os
import re
-import gc
+import json
import yaml
import glob
+import time
+import base64
import random
+import requests
import subprocess
-import wandb
-import torch
-from PIL import Image
+import pandas as pd
import streamlit as st
+from transformers.trainer_utils import get_last_checkpoint
REASONING_START = "