From a0e917e6f576145837c14ae4567ff6992568955f Mon Sep 17 00:00:00 2001 From: GitLab CI Date: Wed, 10 Jun 2026 22:36:25 +0000 Subject: [PATCH] chore: Regenerate all playbooks --- nvidia/sglang/README.md | 12 +++---- .../components/embeddings-generator.tsx | 30 ++++++++++++++-- .../frontend/components/model-selector.tsx | 4 ++- .../frontend/contexts/document-context.tsx | 18 ++++++++-- .../assets/frontend/lib/langchain-service.ts | 5 +-- nvidia/txt2kg/assets/frontend/lib/qdrant.ts | 35 ++++++++++++++++--- nvidia/txt2kg/assets/start.sh | 4 +++ 7 files changed, 88 insertions(+), 20 deletions(-) diff --git a/nvidia/sglang/README.md b/nvidia/sglang/README.md index 60a7f7f..99cef88 100644 --- a/nvidia/sglang/README.md +++ b/nvidia/sglang/README.md @@ -39,9 +39,9 @@ vision-language tasks using models like DeepSeek-V2-Lite. - NVIDIA Spark device with Blackwell architecture - Docker Engine installed and running: `docker --version` - NVIDIA GPU drivers installed: `nvidia-smi` -- NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi` +- NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi` - Sufficient disk space (>20GB available): `df -h` -- Network connectivity for pulling containers: `docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36` +- Network connectivity for pulling containers: `docker pull lmsysorg/sglang:latest-cu130` ## Ancillary files @@ -103,7 +103,7 @@ docker --version nvidia-smi ## Verify Docker GPU support -docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi +docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi ## Check available disk space df -h / @@ -124,7 +124,7 @@ several minutes depending on your network connection. ```bash ## Pull the SGLang container -docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 +docker pull lmsysorg/sglang:latest-cu130 ## Verify the image was downloaded docker images | grep sglang @@ -140,7 +140,7 @@ server inside the container, exposing it on port 30000 for client connections. docker run --gpus all -it --rm \ -p 30000:30000 \ -v /tmp:/tmp \ - lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 \ + lmsysorg/sglang:latest-cu130 \ bash ``` @@ -237,7 +237,7 @@ docker ps | grep sglang | awk '{print $1}' | xargs docker stop docker container prune -f ## Remove SGLang images (optional) -docker rmi lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 +docker rmi lmsysorg/sglang:latest-cu130 ``` ## Step 10. Next steps diff --git a/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx b/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx index a1313ec..1266218 100644 --- a/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx +++ b/nvidia/txt2kg/assets/frontend/components/embeddings-generator.tsx @@ -226,6 +226,8 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings const model = JSON.parse(selectedModel); if (model.provider === "ollama") { processingMethod = `Ollama ${model.model || 'qwen3:1.7b'}`; + } else if (model.provider === "vllm") { + processingMethod = `vLLM ${model.model || 'local model'}`; } else if (model.id?.startsWith("nvidia-")) { processingMethod = 'NVIDIA Nemotron'; } @@ -242,14 +244,36 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings // Call processDocuments with the selected document IDs and processing options const useGraphTransformer = useLangChain && langChainMethod === 'graphtransformer'; - await processDocuments(selectedDocs, { + const processingOptions: Parameters[1] = { useLangChain, useGraphTransformer, promptConfigs: options || undefined, chunkSize: options?.chunkSize, overlapSize: options?.overlapSize, chunkingMethod: options?.chunkingMethod - }); + }; + + try { + const selectedModel = localStorage.getItem("selectedModel"); + if (selectedModel) { + const model = JSON.parse(selectedModel); + if (model.provider === "ollama") { + processingOptions.llmProvider = "ollama"; + processingOptions.ollamaModel = model.model || "qwen3:1.7b"; + processingOptions.ollamaBaseUrl = model.baseURL || "http://localhost:11434/v1"; + } else if (model.provider === "vllm") { + processingOptions.llmProvider = "vllm"; + processingOptions.vllmModel = model.model; + processingOptions.vllmBaseUrl = model.baseURL || "http://localhost:8001/v1"; + } else if (model.provider === "nvidia" || model.id?.startsWith("nvidia-")) { + processingOptions.llmProvider = "nvidia"; + } + } + } catch (e) { + console.log("Could not parse selected model, using default extraction provider"); + } + + await processDocuments(selectedDocs, processingOptions); // Navigate to the edit tab after processing is complete setTimeout(() => { @@ -1265,4 +1289,4 @@ function InfoIcon(props: React.SVGProps) { ) -} \ No newline at end of file +} diff --git a/nvidia/txt2kg/assets/frontend/components/model-selector.tsx b/nvidia/txt2kg/assets/frontend/components/model-selector.tsx index 9bf463c..189ab8b 100644 --- a/nvidia/txt2kg/assets/frontend/components/model-selector.tsx +++ b/nvidia/txt2kg/assets/frontend/components/model-selector.tsx @@ -151,7 +151,9 @@ export function ModelSelector() { // Default to first available local model (vLLM or Ollama) const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama") - setSelectedModel(localModel || availableModels[0]) + const defaultModel = localModel || availableModels[0] + setSelectedModel(defaultModel) + localStorage.setItem("selectedModel", JSON.stringify(defaultModel)) } setIsLoading(false) diff --git a/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx b/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx index ad69d05..0e4c787 100644 --- a/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx +++ b/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx @@ -49,7 +49,7 @@ export type Document = { } } -export type LLMProvider = 'nvidia' | 'ollama'; +export type LLMProvider = 'nvidia' | 'ollama' | 'vllm'; export type ProcessingOptions = { useLangChain?: boolean; @@ -58,6 +58,8 @@ export type ProcessingOptions = { llmProvider?: LLMProvider; ollamaModel?: string; ollamaBaseUrl?: string; + vllmModel?: string; + vllmBaseUrl?: string; chunkSize?: number; overlapSize?: number; chunkingMethod?: 'optimized' | 'pyg'; @@ -451,6 +453,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) { llmProvider = 'ollama', ollamaModel = 'qwen3:1.7b', ollamaBaseUrl = 'http://localhost:11434/v1', + vllmModel, + vllmBaseUrl, chunkSize = 64000, overlapSize = 2000, chunkingMethod = 'optimized' @@ -460,6 +464,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) { llmProvider, ollamaModel, ollamaBaseUrl, + vllmModel, + vllmBaseUrl, chunkSize, overlapSize, chunkingMethod @@ -485,6 +491,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) { llmProvider?: LLMProvider; ollamaModel?: string; ollamaBaseUrl?: string; + vllmModel?: string; + vllmBaseUrl?: string; chunkSize?: number; overlapSize?: number; chunkingMethod?: 'optimized' | 'pyg'; @@ -673,6 +681,12 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) { if (llmOptions.ollamaBaseUrl) { requestBody.ollamaBaseUrl = llmOptions.ollamaBaseUrl; } + if (llmOptions.vllmModel) { + requestBody.vllmModel = llmOptions.vllmModel; + } + if (llmOptions.vllmBaseUrl) { + requestBody.vllmBaseUrl = llmOptions.vllmBaseUrl; + } } // Add prompt configs if available @@ -1273,4 +1287,4 @@ export function useDocuments() { throw new Error("useDocuments must be used within a DocumentProvider") } return context -} \ No newline at end of file +} diff --git a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts index f5c9f09..9ed8f2c 100644 --- a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts +++ b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts @@ -290,9 +290,6 @@ export class LangChainService { configuration: { baseURL: baseURL, timeout: 120000, // 2 minute timeout for vLLM inference - }, - modelKwargs: { - "response_format": { "type": "text" } } }); @@ -320,4 +317,4 @@ export class LangChainService { } // Export a singleton instance for convenience -export const langChainService = LangChainService.getInstance(); \ No newline at end of file +export const langChainService = LangChainService.getInstance(); diff --git a/nvidia/txt2kg/assets/frontend/lib/qdrant.ts b/nvidia/txt2kg/assets/frontend/lib/qdrant.ts index cef0ab2..9ef0bc9 100644 --- a/nvidia/txt2kg/assets/frontend/lib/qdrant.ts +++ b/nvidia/txt2kg/assets/frontend/lib/qdrant.ts @@ -153,6 +153,16 @@ export class QdrantService { return true; } + const collectionsResponse = await fetch(`${this.hostUrl}/collections`, { + method: 'GET' + }); + + if (collectionsResponse.ok) { + console.log(`Qdrant server is reachable`); + this.isQdrantRunningCheck = false; + return true; + } + console.log('Qdrant health check failed - server might not be running'); this.isQdrantRunningCheck = false; return false; @@ -534,6 +544,21 @@ export class QdrantService { public async getStats(): Promise { try { console.log('Getting stats from Qdrant...'); + const isRunning = await this.isQdrantRunning(); + if (!isRunning) { + return { + totalVectorCount: 0, + source: 'qdrant', + httpHealthy: false, + url: this.hostUrl, + error: `Qdrant is not reachable at ${this.hostUrl}. Start vector search with ./start.sh --vector-search if you need Vector DB features.` + }; + } + + if (!this.initialized) { + await this.initialize(); + } + const response = await this.makeRequest(`/collections/${this.collectionName}`, 'GET'); if (response && response.result) { @@ -554,17 +579,19 @@ export class QdrantService { console.log(`Qdrant stats request failed`); return { totalVectorCount: 0, - source: 'error', - httpHealthy: false, - error: 'Failed to get stats' + source: 'qdrant', + httpHealthy: true, + url: this.hostUrl, + error: `Qdrant is reachable, but collection '${this.collectionName}' is not available.` }; } } catch (error) { console.log('Qdrant connection failed - server may not be running'); return { totalVectorCount: 0, - source: 'error', + source: 'qdrant', httpHealthy: false, + url: this.hostUrl, error: error instanceof Error ? error.message : String(error) }; } diff --git a/nvidia/txt2kg/assets/start.sh b/nvidia/txt2kg/assets/start.sh index e027711..87dfb74 100755 --- a/nvidia/txt2kg/assets/start.sh +++ b/nvidia/txt2kg/assets/start.sh @@ -176,6 +176,10 @@ if [ "$USE_VECTOR_SEARCH" = true ]; then echo " • Qdrant: http://localhost:6333" echo " • Sentence Transformers: http://localhost:8000" echo "" +else + echo "Vector Search Services: disabled" + echo " • Start with --vector-search to enable Vector DB status and embedding search" + echo "" fi echo "Next steps:"