mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-06-18 04:22:21 +00:00
chore: Regenerate all playbooks
This commit is contained in:
parent
2f703e1793
commit
a0e917e6f5
@ -39,9 +39,9 @@ vision-language tasks using models like DeepSeek-V2-Lite.
|
||||
- NVIDIA Spark device with Blackwell architecture
|
||||
- Docker Engine installed and running: `docker --version`
|
||||
- NVIDIA GPU drivers installed: `nvidia-smi`
|
||||
- NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi`
|
||||
- NVIDIA Container Toolkit configured: `docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi`
|
||||
- Sufficient disk space (>20GB available): `df -h`
|
||||
- Network connectivity for pulling containers: `docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36`
|
||||
- Network connectivity for pulling containers: `docker pull lmsysorg/sglang:latest-cu130`
|
||||
|
||||
## Ancillary files
|
||||
|
||||
@ -103,7 +103,7 @@ docker --version
|
||||
nvidia-smi
|
||||
|
||||
## Verify Docker GPU support
|
||||
docker run --rm --gpus all lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 nvidia-smi
|
||||
docker run --rm --gpus all lmsysorg/sglang:latest-cu130 nvidia-smi
|
||||
|
||||
## Check available disk space
|
||||
df -h /
|
||||
@ -124,7 +124,7 @@ several minutes depending on your network connection.
|
||||
|
||||
```bash
|
||||
## Pull the SGLang container
|
||||
docker pull lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36
|
||||
docker pull lmsysorg/sglang:latest-cu130
|
||||
|
||||
## Verify the image was downloaded
|
||||
docker images | grep sglang
|
||||
@ -140,7 +140,7 @@ server inside the container, exposing it on port 30000 for client connections.
|
||||
docker run --gpus all -it --rm \
|
||||
-p 30000:30000 \
|
||||
-v /tmp:/tmp \
|
||||
lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36 \
|
||||
lmsysorg/sglang:latest-cu130 \
|
||||
bash
|
||||
```
|
||||
|
||||
@ -237,7 +237,7 @@ docker ps | grep sglang | awk '{print $1}' | xargs docker stop
|
||||
docker container prune -f
|
||||
|
||||
## Remove SGLang images (optional)
|
||||
docker rmi lmsysorg/sglang@sha256:ceaf8b16e02d165143633ac228bbb994a05fe77d7e0526cf035ae4bbf4eacc36
|
||||
docker rmi lmsysorg/sglang:latest-cu130
|
||||
```
|
||||
|
||||
## Step 10. Next steps
|
||||
|
||||
@ -226,6 +226,8 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings
|
||||
const model = JSON.parse(selectedModel);
|
||||
if (model.provider === "ollama") {
|
||||
processingMethod = `Ollama ${model.model || 'qwen3:1.7b'}`;
|
||||
} else if (model.provider === "vllm") {
|
||||
processingMethod = `vLLM ${model.model || 'local model'}`;
|
||||
} else if (model.id?.startsWith("nvidia-")) {
|
||||
processingMethod = 'NVIDIA Nemotron';
|
||||
}
|
||||
@ -242,14 +244,36 @@ export function EmbeddingsGenerator({ showTripleExtraction = false }: Embeddings
|
||||
|
||||
// Call processDocuments with the selected document IDs and processing options
|
||||
const useGraphTransformer = useLangChain && langChainMethod === 'graphtransformer';
|
||||
await processDocuments(selectedDocs, {
|
||||
const processingOptions: Parameters<typeof processDocuments>[1] = {
|
||||
useLangChain,
|
||||
useGraphTransformer,
|
||||
promptConfigs: options || undefined,
|
||||
chunkSize: options?.chunkSize,
|
||||
overlapSize: options?.overlapSize,
|
||||
chunkingMethod: options?.chunkingMethod
|
||||
});
|
||||
};
|
||||
|
||||
try {
|
||||
const selectedModel = localStorage.getItem("selectedModel");
|
||||
if (selectedModel) {
|
||||
const model = JSON.parse(selectedModel);
|
||||
if (model.provider === "ollama") {
|
||||
processingOptions.llmProvider = "ollama";
|
||||
processingOptions.ollamaModel = model.model || "qwen3:1.7b";
|
||||
processingOptions.ollamaBaseUrl = model.baseURL || "http://localhost:11434/v1";
|
||||
} else if (model.provider === "vllm") {
|
||||
processingOptions.llmProvider = "vllm";
|
||||
processingOptions.vllmModel = model.model;
|
||||
processingOptions.vllmBaseUrl = model.baseURL || "http://localhost:8001/v1";
|
||||
} else if (model.provider === "nvidia" || model.id?.startsWith("nvidia-")) {
|
||||
processingOptions.llmProvider = "nvidia";
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log("Could not parse selected model, using default extraction provider");
|
||||
}
|
||||
|
||||
await processDocuments(selectedDocs, processingOptions);
|
||||
|
||||
// Navigate to the edit tab after processing is complete
|
||||
setTimeout(() => {
|
||||
@ -1265,4 +1289,4 @@ function InfoIcon(props: React.SVGProps<SVGSVGElement>) {
|
||||
<path d="M12 8h.01" />
|
||||
</svg>
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,7 +151,9 @@ export function ModelSelector() {
|
||||
|
||||
// Default to first available local model (vLLM or Ollama)
|
||||
const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama")
|
||||
setSelectedModel(localModel || availableModels[0])
|
||||
const defaultModel = localModel || availableModels[0]
|
||||
setSelectedModel(defaultModel)
|
||||
localStorage.setItem("selectedModel", JSON.stringify(defaultModel))
|
||||
}
|
||||
|
||||
setIsLoading(false)
|
||||
|
||||
@ -49,7 +49,7 @@ export type Document = {
|
||||
}
|
||||
}
|
||||
|
||||
export type LLMProvider = 'nvidia' | 'ollama';
|
||||
export type LLMProvider = 'nvidia' | 'ollama' | 'vllm';
|
||||
|
||||
export type ProcessingOptions = {
|
||||
useLangChain?: boolean;
|
||||
@ -58,6 +58,8 @@ export type ProcessingOptions = {
|
||||
llmProvider?: LLMProvider;
|
||||
ollamaModel?: string;
|
||||
ollamaBaseUrl?: string;
|
||||
vllmModel?: string;
|
||||
vllmBaseUrl?: string;
|
||||
chunkSize?: number;
|
||||
overlapSize?: number;
|
||||
chunkingMethod?: 'optimized' | 'pyg';
|
||||
@ -451,6 +453,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
|
||||
llmProvider = 'ollama',
|
||||
ollamaModel = 'qwen3:1.7b',
|
||||
ollamaBaseUrl = 'http://localhost:11434/v1',
|
||||
vllmModel,
|
||||
vllmBaseUrl,
|
||||
chunkSize = 64000,
|
||||
overlapSize = 2000,
|
||||
chunkingMethod = 'optimized'
|
||||
@ -460,6 +464,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
|
||||
llmProvider,
|
||||
ollamaModel,
|
||||
ollamaBaseUrl,
|
||||
vllmModel,
|
||||
vllmBaseUrl,
|
||||
chunkSize,
|
||||
overlapSize,
|
||||
chunkingMethod
|
||||
@ -485,6 +491,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
|
||||
llmProvider?: LLMProvider;
|
||||
ollamaModel?: string;
|
||||
ollamaBaseUrl?: string;
|
||||
vllmModel?: string;
|
||||
vllmBaseUrl?: string;
|
||||
chunkSize?: number;
|
||||
overlapSize?: number;
|
||||
chunkingMethod?: 'optimized' | 'pyg';
|
||||
@ -673,6 +681,12 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
|
||||
if (llmOptions.ollamaBaseUrl) {
|
||||
requestBody.ollamaBaseUrl = llmOptions.ollamaBaseUrl;
|
||||
}
|
||||
if (llmOptions.vllmModel) {
|
||||
requestBody.vllmModel = llmOptions.vllmModel;
|
||||
}
|
||||
if (llmOptions.vllmBaseUrl) {
|
||||
requestBody.vllmBaseUrl = llmOptions.vllmBaseUrl;
|
||||
}
|
||||
}
|
||||
|
||||
// Add prompt configs if available
|
||||
@ -1273,4 +1287,4 @@ export function useDocuments() {
|
||||
throw new Error("useDocuments must be used within a DocumentProvider")
|
||||
}
|
||||
return context
|
||||
}
|
||||
}
|
||||
|
||||
@ -290,9 +290,6 @@ export class LangChainService {
|
||||
configuration: {
|
||||
baseURL: baseURL,
|
||||
timeout: 120000, // 2 minute timeout for vLLM inference
|
||||
},
|
||||
modelKwargs: {
|
||||
"response_format": { "type": "text" }
|
||||
}
|
||||
});
|
||||
|
||||
@ -320,4 +317,4 @@ export class LangChainService {
|
||||
}
|
||||
|
||||
// Export a singleton instance for convenience
|
||||
export const langChainService = LangChainService.getInstance();
|
||||
export const langChainService = LangChainService.getInstance();
|
||||
|
||||
@ -153,6 +153,16 @@ export class QdrantService {
|
||||
return true;
|
||||
}
|
||||
|
||||
const collectionsResponse = await fetch(`${this.hostUrl}/collections`, {
|
||||
method: 'GET'
|
||||
});
|
||||
|
||||
if (collectionsResponse.ok) {
|
||||
console.log(`Qdrant server is reachable`);
|
||||
this.isQdrantRunningCheck = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
console.log('Qdrant health check failed - server might not be running');
|
||||
this.isQdrantRunningCheck = false;
|
||||
return false;
|
||||
@ -534,6 +544,21 @@ export class QdrantService {
|
||||
public async getStats(): Promise<any> {
|
||||
try {
|
||||
console.log('Getting stats from Qdrant...');
|
||||
const isRunning = await this.isQdrantRunning();
|
||||
if (!isRunning) {
|
||||
return {
|
||||
totalVectorCount: 0,
|
||||
source: 'qdrant',
|
||||
httpHealthy: false,
|
||||
url: this.hostUrl,
|
||||
error: `Qdrant is not reachable at ${this.hostUrl}. Start vector search with ./start.sh --vector-search if you need Vector DB features.`
|
||||
};
|
||||
}
|
||||
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
const response = await this.makeRequest(`/collections/${this.collectionName}`, 'GET');
|
||||
|
||||
if (response && response.result) {
|
||||
@ -554,17 +579,19 @@ export class QdrantService {
|
||||
console.log(`Qdrant stats request failed`);
|
||||
return {
|
||||
totalVectorCount: 0,
|
||||
source: 'error',
|
||||
httpHealthy: false,
|
||||
error: 'Failed to get stats'
|
||||
source: 'qdrant',
|
||||
httpHealthy: true,
|
||||
url: this.hostUrl,
|
||||
error: `Qdrant is reachable, but collection '${this.collectionName}' is not available.`
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('Qdrant connection failed - server may not be running');
|
||||
return {
|
||||
totalVectorCount: 0,
|
||||
source: 'error',
|
||||
source: 'qdrant',
|
||||
httpHealthy: false,
|
||||
url: this.hostUrl,
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
};
|
||||
}
|
||||
|
||||
@ -176,6 +176,10 @@ if [ "$USE_VECTOR_SEARCH" = true ]; then
|
||||
echo " • Qdrant: http://localhost:6333"
|
||||
echo " • Sentence Transformers: http://localhost:8000"
|
||||
echo ""
|
||||
else
|
||||
echo "Vector Search Services: disabled"
|
||||
echo " • Start with --vector-search to enable Vector DB status and embedding search"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo "Next steps:"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user