chore: Regenerate all playbooks

This commit is contained in:
GitLab CI 2026-06-10 22:36:25 +00:00
parent 2f703e1793
commit a0e917e6f5
7 changed files with 88 additions and 20 deletions

View File

@ -39,9 +39,9 @@ vision-language tasks using models like DeepSeek-V2-Lite.
- NVIDIA Spark device with Blackwell architecture - NVIDIA Spark device with Blackwell architecture
- Docker Engine installed and running: `docker --version` - Docker Engine installed and running: `docker --version`
- NVIDIA GPU drivers installed: `nvidia-smi` - NVIDIA GPU drivers installed: `nvidia-smi`
- NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi` - NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi`
- Sufficient disk space (>20GB available): `df -h` - Sufficient disk space (>20GB available): `df -h`
- Network connectivity for pulling containers: `docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36` - Network connectivity for pulling containers: `docker pull lmsysorg/sglang:latest-cu130`
## Ancillary files ## Ancillary files
@ -103,7 +103,7 @@ docker --version
nvidia-smi nvidia-smi
## Verify Docker GPU support ## Verify Docker GPU support
docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi
## Check available disk space ## Check available disk space
df -h / df -h /
@ -124,7 +124,7 @@ several minutes depending on your network connection.
```bash ```bash
## Pull the SGLang container ## Pull the SGLang container
docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 docker pull lmsysorg/sglang:latest-cu130
## Verify the image was downloaded ## Verify the image was downloaded
docker images | grep sglang docker images | grep sglang
@ -140,7 +140,7 @@ server inside the container, exposing it on port 30000 for client connections.
docker run --gpus all -it --rm \ docker run --gpus all -it --rm \
-p 30000:30000 \ -p 30000:30000 \
-v /tmp:/tmp \ -v /tmp:/tmp \
lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 \ lmsysorg/sglang:latest-cu130 \
bash bash
``` ```
@ -237,7 +237,7 @@ docker ps | grep sglang | awk '{print $1}' | xargs docker stop
docker container prune -f docker container prune -f
## Remove SGLang images (optional) ## Remove SGLang images (optional)
docker rmi lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 docker rmi lmsysorg/sglang:latest-cu130
``` ```
## Step 10. Next steps ## Step 10. Next steps

View File

@ -226,6 +226,8 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings
const model = JSON.parse(selectedModel); const model = JSON.parse(selectedModel);
if (model.provider === "ollama") { if (model.provider === "ollama") {
processingMethod = `Ollama ${model.model || 'qwen3:1.7b'}`; processingMethod = `Ollama ${model.model || 'qwen3:1.7b'}`;
} else if (model.provider === "vllm") {
processingMethod = `vLLM ${model.model || 'local model'}`;
} else if (model.id?.startsWith("nvidia-")) { } else if (model.id?.startsWith("nvidia-")) {
processingMethod = 'NVIDIA Nemotron'; processingMethod = 'NVIDIA Nemotron';
} }
@ -242,14 +244,36 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings
// Call processDocuments with the selected document IDs and processing options // Call processDocuments with the selected document IDs and processing options
const useGraphTransformer = useLangChain && langChainMethod === 'graphtransformer'; const useGraphTransformer = useLangChain && langChainMethod === 'graphtransformer';
await processDocuments(selectedDocs, { const processingOptions: Parameters<typeof processDocuments>[1] = {
useLangChain, useLangChain,
useGraphTransformer, useGraphTransformer,
promptConfigs: options || undefined, promptConfigs: options || undefined,
chunkSize: options?.chunkSize, chunkSize: options?.chunkSize,
overlapSize: options?.overlapSize, overlapSize: options?.overlapSize,
chunkingMethod: options?.chunkingMethod chunkingMethod: options?.chunkingMethod
}); };
try {
const selectedModel = localStorage.getItem("selectedModel");
if (selectedModel) {
const model = JSON.parse(selectedModel);
if (model.provider === "ollama") {
processingOptions.llmProvider = "ollama";
processingOptions.ollamaModel = model.model || "qwen3:1.7b";
processingOptions.ollamaBaseUrl = model.baseURL || "http://localhost:11434/v1";
} else if (model.provider === "vllm") {
processingOptions.llmProvider = "vllm";
processingOptions.vllmModel = model.model;
processingOptions.vllmBaseUrl = model.baseURL || "http://localhost:8001/v1";
} else if (model.provider === "nvidia" || model.id?.startsWith("nvidia-")) {
processingOptions.llmProvider = "nvidia";
}
}
} catch (e) {
console.log("Could not parse selected model, using default extraction provider");
}
await processDocuments(selectedDocs, processingOptions);
// Navigate to the edit tab after processing is complete // Navigate to the edit tab after processing is complete
setTimeout(() => { setTimeout(() => {
@ -1265,4 +1289,4 @@ function InfoIcon(props: React.SVGProps<SVGSVGElement>) {
<path d="M12 8h.01" /> <path d="M12 8h.01" />
</svg> </svg>
) )
} }

View File

@ -151,7 +151,9 @@ export function ModelSelector() {
// Default to first available local model (vLLM or Ollama) // Default to first available local model (vLLM or Ollama)
const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama") const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama")
setSelectedModel(localModel || availableModels[0]) const defaultModel = localModel || availableModels[0]
setSelectedModel(defaultModel)
localStorage.setItem("selectedModel", JSON.stringify(defaultModel))
} }
setIsLoading(false) setIsLoading(false)

View File

@ -49,7 +49,7 @@ export type Document = {
} }
} }
export type LLMProvider = 'nvidia' | 'ollama'; export type LLMProvider = 'nvidia' | 'ollama' | 'vllm';
export type ProcessingOptions = { export type ProcessingOptions = {
useLangChain?: boolean; useLangChain?: boolean;
@ -58,6 +58,8 @@ export type ProcessingOptions = {
llmProvider?: LLMProvider; llmProvider?: LLMProvider;
ollamaModel?: string; ollamaModel?: string;
ollamaBaseUrl?: string; ollamaBaseUrl?: string;
vllmModel?: string;
vllmBaseUrl?: string;
chunkSize?: number; chunkSize?: number;
overlapSize?: number; overlapSize?: number;
chunkingMethod?: 'optimized' | 'pyg'; chunkingMethod?: 'optimized' | 'pyg';
@ -451,6 +453,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
llmProvider = 'ollama', llmProvider = 'ollama',
ollamaModel = 'qwen3:1.7b', ollamaModel = 'qwen3:1.7b',
ollamaBaseUrl = 'http://localhost:11434/v1', ollamaBaseUrl = 'http://localhost:11434/v1',
vllmModel,
vllmBaseUrl,
chunkSize = 64000, chunkSize = 64000,
overlapSize = 2000, overlapSize = 2000,
chunkingMethod = 'optimized' chunkingMethod = 'optimized'
@ -460,6 +464,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
llmProvider, llmProvider,
ollamaModel, ollamaModel,
ollamaBaseUrl, ollamaBaseUrl,
vllmModel,
vllmBaseUrl,
chunkSize, chunkSize,
overlapSize, overlapSize,
chunkingMethod chunkingMethod
@ -485,6 +491,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
llmProvider?: LLMProvider; llmProvider?: LLMProvider;
ollamaModel?: string; ollamaModel?: string;
ollamaBaseUrl?: string; ollamaBaseUrl?: string;
vllmModel?: string;
vllmBaseUrl?: string;
chunkSize?: number; chunkSize?: number;
overlapSize?: number; overlapSize?: number;
chunkingMethod?: 'optimized' | 'pyg'; chunkingMethod?: 'optimized' | 'pyg';
@ -673,6 +681,12 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
if (llmOptions.ollamaBaseUrl) { if (llmOptions.ollamaBaseUrl) {
requestBody.ollamaBaseUrl = llmOptions.ollamaBaseUrl; requestBody.ollamaBaseUrl = llmOptions.ollamaBaseUrl;
} }
if (llmOptions.vllmModel) {
requestBody.vllmModel = llmOptions.vllmModel;
}
if (llmOptions.vllmBaseUrl) {
requestBody.vllmBaseUrl = llmOptions.vllmBaseUrl;
}
} }
// Add prompt configs if available // Add prompt configs if available
@ -1273,4 +1287,4 @@ export function useDocuments() {
throw new Error("useDocuments must be used within a DocumentProvider") throw new Error("useDocuments must be used within a DocumentProvider")
} }
return context return context
} }

View File

@ -290,9 +290,6 @@ export class LangChainService {
configuration: { configuration: {
baseURL: baseURL, baseURL: baseURL,
timeout: 120000, // 2 minute timeout for vLLM inference timeout: 120000, // 2 minute timeout for vLLM inference
},
modelKwargs: {
"response_format": { "type": "text" }
} }
}); });
@ -320,4 +317,4 @@ export class LangChainService {
} }
// Export a singleton instance for convenience // Export a singleton instance for convenience
export const langChainService = LangChainService.getInstance(); export const langChainService = LangChainService.getInstance();

View File

@ -153,6 +153,16 @@ export class QdrantService {
return true; return true;
} }
const collectionsResponse = await fetch(`${this.hostUrl}/collections`, {
method: 'GET'
});
if (collectionsResponse.ok) {
console.log(`Qdrant server is reachable`);
this.isQdrantRunningCheck = false;
return true;
}
console.log('Qdrant health check failed - server might not be running'); console.log('Qdrant health check failed - server might not be running');
this.isQdrantRunningCheck = false; this.isQdrantRunningCheck = false;
return false; return false;
@ -534,6 +544,21 @@ export class QdrantService {
public async getStats(): Promise<any> { public async getStats(): Promise<any> {
try { try {
console.log('Getting stats from Qdrant...'); console.log('Getting stats from Qdrant...');
const isRunning = await this.isQdrantRunning();
if (!isRunning) {
return {
totalVectorCount: 0,
source: 'qdrant',
httpHealthy: false,
url: this.hostUrl,
error: `Qdrant is not reachable at ${this.hostUrl}. Start vector search with ./start.sh --vector-search if you need Vector DB features.`
};
}
if (!this.initialized) {
await this.initialize();
}
const response = await this.makeRequest(`/collections/${this.collectionName}`, 'GET'); const response = await this.makeRequest(`/collections/${this.collectionName}`, 'GET');
if (response && response.result) { if (response && response.result) {
@ -554,17 +579,19 @@ export class QdrantService {
console.log(`Qdrant stats request failed`); console.log(`Qdrant stats request failed`);
return { return {
totalVectorCount: 0, totalVectorCount: 0,
source: 'error', source: 'qdrant',
httpHealthy: false, httpHealthy: true,
error: 'Failed to get stats' url: this.hostUrl,
error: `Qdrant is reachable, but collection '${this.collectionName}' is not available.`
}; };
} }
} catch (error) { } catch (error) {
console.log('Qdrant connection failed - server may not be running'); console.log('Qdrant connection failed - server may not be running');
return { return {
totalVectorCount: 0, totalVectorCount: 0,
source: 'error', source: 'qdrant',
httpHealthy: false, httpHealthy: false,
url: this.hostUrl,
error: error instanceof Error ? error.message : String(error) error: error instanceof Error ? error.message : String(error)
}; };
} }

View File

@ -176,6 +176,10 @@ if [ "$USE_VECTOR_SEARCH" = true ]; then
echo " • Qdrant: http://localhost:6333" echo " • Qdrant: http://localhost:6333"
echo " • Sentence Transformers: http://localhost:8000" echo " • Sentence Transformers: http://localhost:8000"
echo "" echo ""
else
echo "Vector Search Services: disabled"
echo " • Start with --vector-search to enable Vector DB status and embedding search"
echo ""
fi fi
echo "Next steps:" echo "Next steps:"