From a0e917e6f576145837c14ae4567ff6992568955f Mon Sep 17 00:00:00 2001
From: GitLab CI <automaton@nvidia.com>
Date: Wed, 10 Jun 2026 22:36:25 +0000
Subject: [PATCH] chore: Regenerate all playbooks

---
 nvidia/sglang/README.md                       | 12 +++----
 .../components/embeddings-generator.tsx       | 30 ++++++++++++++--
 .../frontend/components/model-selector.tsx    |  4 ++-
 .../frontend/contexts/document-context.tsx    | 18 ++++++++--
 .../assets/frontend/lib/langchain-service.ts  |  5 +--
 nvidia/txt2kg/assets/frontend/lib/qdrant.ts   | 35 ++++++++++++++++---
 nvidia/txt2kg/assets/start.sh                 |  4 +++
 7 files changed, 88 insertions(+), 20 deletions(-)

diff --git a/nvidia/sglang/README.md b/nvidia/sglang/README.md
index 60a7f7f..99cef88 100644
--- a/nvidia/sglang/README.md
+++ b/nvidia/sglang/README.md
@@ -39,9 +39,9 @@ vision-language tasks using models like DeepSeek-V2-Lite.
 - NVIDIA Spark device with Blackwell architecture
 - Docker Engine installed and running: `docker --version`
 - NVIDIA GPU drivers installed: `nvidia-smi`
-- NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi`
+- NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi`
 - Sufficient disk space (>20GB available): `df -h`
-- Network connectivity for pulling containers: `docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36`
+- Network connectivity for pulling containers: `docker pull lmsysorg/sglang:latest-cu130`
 
 ## Ancillary files
 
@@ -103,7 +103,7 @@ docker --version
 nvidia-smi
 
 ## Verify Docker GPU support
-docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi
+docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi
 
 ## Check available disk space
 df -h /
@@ -124,7 +124,7 @@ several minutes depending on your network connection.
 
 ```bash
 ## Pull the SGLang container
-docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36
+docker pull lmsysorg/sglang:latest-cu130
 
 ## Verify the image was downloaded
 docker images | grep sglang
@@ -140,7 +140,7 @@ server inside the container, exposing it on port 30000 for client connections.
 docker run --gpus all -it --rm \
   -p 30000:30000 \
   -v /tmp:/tmp \
-  lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 \
+  lmsysorg/sglang:latest-cu130 \
   bash
 ```
 
@@ -237,7 +237,7 @@ docker ps | grep sglang | awk '{print $1}' | xargs docker stop
 docker container prune -f
 
 ## Remove SGLang images (optional)
-docker rmi lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36
+docker rmi lmsysorg/sglang:latest-cu130
 ```
 
 ## Step 10. Next steps
diff --git a/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx b/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx
index a1313ec..1266218 100644
--- a/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx
@@ -226,6 +226,8 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings
           const model = JSON.parse(selectedModel);
           if (model.provider === "ollama") {
             processingMethod = `Ollama ${model.model || 'qwen3:1.7b'}`;
+          } else if (model.provider === "vllm") {
+            processingMethod = `vLLM ${model.model || 'local model'}`;
           } else if (model.id?.startsWith("nvidia-")) {
             processingMethod = 'NVIDIA Nemotron';
           }
@@ -242,14 +244,36 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings
       
       // Call processDocuments with the selected document IDs and processing options
       const useGraphTransformer = useLangChain && langChainMethod === 'graphtransformer';
-      await processDocuments(selectedDocs, {
+      const processingOptions: Parameters<typeof processDocuments>[1] = {
         useLangChain,
         useGraphTransformer,
         promptConfigs: options || undefined,
         chunkSize: options?.chunkSize,
         overlapSize: options?.overlapSize,
         chunkingMethod: options?.chunkingMethod
-      });
+      };
+
+      try {
+        const selectedModel = localStorage.getItem("selectedModel");
+        if (selectedModel) {
+          const model = JSON.parse(selectedModel);
+          if (model.provider === "ollama") {
+            processingOptions.llmProvider = "ollama";
+            processingOptions.ollamaModel = model.model || "qwen3:1.7b";
+            processingOptions.ollamaBaseUrl = model.baseURL || "http://localhost:11434/v1";
+          } else if (model.provider === "vllm") {
+            processingOptions.llmProvider = "vllm";
+            processingOptions.vllmModel = model.model;
+            processingOptions.vllmBaseUrl = model.baseURL || "http://localhost:8001/v1";
+          } else if (model.provider === "nvidia" || model.id?.startsWith("nvidia-")) {
+            processingOptions.llmProvider = "nvidia";
+          }
+        }
+      } catch (e) {
+        console.log("Could not parse selected model, using default extraction provider");
+      }
+
+      await processDocuments(selectedDocs, processingOptions);
       
       // Navigate to the edit tab after processing is complete
       setTimeout(() => {
@@ -1265,4 +1289,4 @@ function InfoIcon(props: React.SVGProps<SVGSVGElement>) {
       <path d="M12 8h.01" />
     </svg>
   )
-} 
\ No newline at end of file
+}
diff --git a/nvidia/txt2kg/assets/frontend/components/model-selector.tsx b/nvidia/txt2kg/assets/frontend/components/model-selector.tsx
index 9bf463c..189ab8b 100644
--- a/nvidia/txt2kg/assets/frontend/components/model-selector.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/model-selector.tsx
@@ -151,7 +151,9 @@ export function ModelSelector() {
       
       // Default to first available local model (vLLM or Ollama)
       const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama")
-      setSelectedModel(localModel || availableModels[0])
+      const defaultModel = localModel || availableModels[0]
+      setSelectedModel(defaultModel)
+      localStorage.setItem("selectedModel", JSON.stringify(defaultModel))
     }
     
     setIsLoading(false)
diff --git a/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx b/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx
index ad69d05..0e4c787 100644
--- a/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx
+++ b/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx
@@ -49,7 +49,7 @@ export type Document = {
   }
 }
 
-export type LLMProvider = 'nvidia' | 'ollama';
+export type LLMProvider = 'nvidia' | 'ollama' | 'vllm';
 
 export type ProcessingOptions = {
   useLangChain?: boolean;
@@ -58,6 +58,8 @@ export type ProcessingOptions = {
   llmProvider?: LLMProvider;
   ollamaModel?: string;
   ollamaBaseUrl?: string;
+  vllmModel?: string;
+  vllmBaseUrl?: string;
   chunkSize?: number;
   overlapSize?: number;
   chunkingMethod?: 'optimized' | 'pyg';
@@ -451,6 +453,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
       llmProvider = 'ollama',
       ollamaModel = 'qwen3:1.7b',
       ollamaBaseUrl = 'http://localhost:11434/v1',
+      vllmModel,
+      vllmBaseUrl,
       chunkSize = 64000,
       overlapSize = 2000,
       chunkingMethod = 'optimized'
@@ -460,6 +464,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
       llmProvider,
       ollamaModel,
       ollamaBaseUrl,
+      vllmModel,
+      vllmBaseUrl,
       chunkSize,
       overlapSize,
       chunkingMethod
@@ -485,6 +491,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
       llmProvider?: LLMProvider;
       ollamaModel?: string;
       ollamaBaseUrl?: string;
+      vllmModel?: string;
+      vllmBaseUrl?: string;
       chunkSize?: number;
       overlapSize?: number;
       chunkingMethod?: 'optimized' | 'pyg';
@@ -673,6 +681,12 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
                 if (llmOptions.ollamaBaseUrl) {
                   requestBody.ollamaBaseUrl = llmOptions.ollamaBaseUrl;
                 }
+                if (llmOptions.vllmModel) {
+                  requestBody.vllmModel = llmOptions.vllmModel;
+                }
+                if (llmOptions.vllmBaseUrl) {
+                  requestBody.vllmBaseUrl = llmOptions.vllmBaseUrl;
+                }
               }
               
               // Add prompt configs if available
@@ -1273,4 +1287,4 @@ export function useDocuments() {
     throw new Error("useDocuments must be used within a DocumentProvider")
   }
   return context
-}
\ No newline at end of file
+}
diff --git a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
index f5c9f09..9ed8f2c 100644
--- a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
@@ -290,9 +290,6 @@ export class LangChainService {
         configuration: {
           baseURL: baseURL,
           timeout: 120000, // 2 minute timeout for vLLM inference
-        },
-        modelKwargs: {
-          "response_format": { "type": "text" }
         }
       });
       
@@ -320,4 +317,4 @@ export class LangChainService {
 }
 
 // Export a singleton instance for convenience
-export const langChainService = LangChainService.getInstance(); 
\ No newline at end of file
+export const langChainService = LangChainService.getInstance();
diff --git a/nvidia/txt2kg/assets/frontend/lib/qdrant.ts b/nvidia/txt2kg/assets/frontend/lib/qdrant.ts
index cef0ab2..9ef0bc9 100644
--- a/nvidia/txt2kg/assets/frontend/lib/qdrant.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/qdrant.ts
@@ -153,6 +153,16 @@ export class QdrantService {
         return true;
       }
 
+      const collectionsResponse = await fetch(`${this.hostUrl}/collections`, {
+        method: 'GET'
+      });
+
+      if (collectionsResponse.ok) {
+        console.log(`Qdrant server is reachable`);
+        this.isQdrantRunningCheck = false;
+        return true;
+      }
+
       console.log('Qdrant health check failed - server might not be running');
       this.isQdrantRunningCheck = false;
       return false;
@@ -534,6 +544,21 @@ export class QdrantService {
   public async getStats(): Promise<any> {
     try {
       console.log('Getting stats from Qdrant...');
+      const isRunning = await this.isQdrantRunning();
+      if (!isRunning) {
+        return {
+          totalVectorCount: 0,
+          source: 'qdrant',
+          httpHealthy: false,
+          url: this.hostUrl,
+          error: `Qdrant is not reachable at ${this.hostUrl}. Start vector search with ./start.sh --vector-search if you need Vector DB features.`
+        };
+      }
+
+      if (!this.initialized) {
+        await this.initialize();
+      }
+
       const response = await this.makeRequest(`/collections/${this.collectionName}`, 'GET');
 
       if (response && response.result) {
@@ -554,17 +579,19 @@ export class QdrantService {
         console.log(`Qdrant stats request failed`);
         return {
           totalVectorCount: 0,
-          source: 'error',
-          httpHealthy: false,
-          error: 'Failed to get stats'
+          source: 'qdrant',
+          httpHealthy: true,
+          url: this.hostUrl,
+          error: `Qdrant is reachable, but collection '${this.collectionName}' is not available.`
         };
       }
     } catch (error) {
       console.log('Qdrant connection failed - server may not be running');
       return {
         totalVectorCount: 0,
-        source: 'error',
+        source: 'qdrant',
         httpHealthy: false,
+        url: this.hostUrl,
         error: error instanceof Error ? error.message : String(error)
       };
     }
diff --git a/nvidia/txt2kg/assets/start.sh b/nvidia/txt2kg/assets/start.sh
index e027711..87dfb74 100755
--- a/nvidia/txt2kg/assets/start.sh
+++ b/nvidia/txt2kg/assets/start.sh
@@ -176,6 +176,10 @@ if [ "$USE_VECTOR_SEARCH" = true ]; then
   echo "  • Qdrant: http://localhost:6333"
   echo "  • Sentence Transformers: http://localhost:8000"
   echo ""
+else
+  echo "Vector Search Services: disabled"
+  echo "  • Start with --vector-search to enable Vector DB status and embedding search"
+  echo ""
 fi
 
 echo "Next steps:"