mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-22 18:13:52 +00:00
Add configurable NVIDIA model support
This commit is contained in:
parent
215ce25c05
commit
97e4be5772
@ -29,7 +29,8 @@ export async function POST(req: NextRequest) {
|
||||
ollamaModel,
|
||||
ollamaBaseUrl,
|
||||
vllmModel,
|
||||
vllmBaseUrl
|
||||
vllmBaseUrl,
|
||||
nvidiaModel
|
||||
} = body;
|
||||
|
||||
if (!text || typeof text !== 'string') {
|
||||
@ -136,7 +137,8 @@ export async function POST(req: NextRequest) {
|
||||
ollamaModel: ollamaModel,
|
||||
ollamaBaseUrl: ollamaBaseUrl,
|
||||
vllmModel: vllmModel,
|
||||
vllmBaseUrl: vllmBaseUrl
|
||||
vllmBaseUrl: vllmBaseUrl,
|
||||
nvidiaModel: nvidiaModel
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -379,7 +379,8 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
|
||||
console.log(`🦙 Using Ollama model: ${requestBody.ollamaModel}`);
|
||||
} else if (model.id === "nvidia-nemotron" || model.id === "nvidia-nemotron-nano") {
|
||||
requestBody.llmProvider = "nvidia";
|
||||
console.log(`🖥️ Using NVIDIA model: ${model.id}`);
|
||||
requestBody.nvidiaModel = model.model; // Pass the actual model name
|
||||
console.log(`🖥️ Using NVIDIA model: ${model.model}`);
|
||||
}
|
||||
} catch (e) {
|
||||
// Ignore parsing errors, will use default
|
||||
@ -498,8 +499,51 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
|
||||
setIsProcessing(true);
|
||||
|
||||
try {
|
||||
// Check which documents are already processed in ArangoDB
|
||||
console.log('🔍 Checking which documents are already processed in ArangoDB...');
|
||||
let alreadyProcessedDocs: Set<string> = new Set();
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/graph-db/check-document', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
documentNames: docsToProcess.map(d => d.name)
|
||||
})
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const result = await response.json();
|
||||
if (result.processedDocuments) {
|
||||
Object.entries(result.processedDocuments).forEach(([docName, isProcessed]) => {
|
||||
if (isProcessed) {
|
||||
alreadyProcessedDocs.add(docName);
|
||||
}
|
||||
});
|
||||
console.log(`✅ Found ${alreadyProcessedDocs.size} documents already processed in ArangoDB:`, Array.from(alreadyProcessedDocs));
|
||||
}
|
||||
}
|
||||
} catch (checkError) {
|
||||
console.warn('⚠️ Could not check for already processed documents, continuing anyway:', checkError);
|
||||
}
|
||||
|
||||
// Process each document sequentially
|
||||
for (const doc of docsToProcess) {
|
||||
// Skip if document is already processed in ArangoDB
|
||||
if (alreadyProcessedDocs.has(doc.name)) {
|
||||
console.log(`⏭️ Skipping document "${doc.name}" - already processed in ArangoDB`);
|
||||
updateDocumentStatus(doc.id, "Processed", {
|
||||
triples: doc.triples || [],
|
||||
graph: doc.graph,
|
||||
error: undefined
|
||||
});
|
||||
toast({
|
||||
title: "Document Skipped",
|
||||
description: `"${doc.name}" is already stored in ArangoDB`,
|
||||
duration: 3000,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Update status to Processing before we begin
|
||||
updateDocumentStatus(doc.id, "Processing");
|
||||
|
||||
|
||||
@ -76,6 +76,7 @@ export class TextProcessor {
|
||||
private ollamaBaseUrl: string = 'http://localhost:11434/v1';
|
||||
private vllmModel: string = 'meta-llama/Llama-3.2-3B-Instruct';
|
||||
private vllmBaseUrl: string = 'http://localhost:8001/v1';
|
||||
private nvidiaModel: string = 'nvidia/llama-3.3-nemotron-super-49b-v1.5'; // Default NVIDIA model
|
||||
|
||||
private constructor() {
|
||||
this.sentenceTransformerUrl = process.env.SENTENCE_TRANSFORMER_URL || "http://localhost:8000";
|
||||
@ -343,6 +344,7 @@ Text: ${chunk}
|
||||
${formatInstructions}`;
|
||||
|
||||
// Call NVIDIA API directly using fetch
|
||||
console.log(`🖥️ Calling NVIDIA API with model: ${this.nvidiaModel}`);
|
||||
const response = await fetch('https://integrate.api.nvidia.com/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
@ -350,7 +352,7 @@ ${formatInstructions}`;
|
||||
'Authorization': `Bearer ${apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'nvidia/llama-3.3-nemotron-super-49b-v1.5',
|
||||
model: this.nvidiaModel, // Use the configured model
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
@ -561,11 +563,15 @@ ${formatInstructions}`;
|
||||
ollamaBaseUrl?: string;
|
||||
vllmModel?: string;
|
||||
vllmBaseUrl?: string;
|
||||
nvidiaModel?: string;
|
||||
}): void {
|
||||
this.selectedLLMProvider = provider;
|
||||
if (provider === 'ollama') {
|
||||
this.ollamaModel = options?.ollamaModel || this.ollamaModel;
|
||||
this.ollamaBaseUrl = options?.ollamaBaseUrl || this.ollamaBaseUrl;
|
||||
} else if (provider === 'nvidia') {
|
||||
this.nvidiaModel = options?.nvidiaModel || this.nvidiaModel;
|
||||
console.log(`🖥️ TextProcessor: NVIDIA model set to: ${this.nvidiaModel}`);
|
||||
} else if (provider === 'vllm') {
|
||||
this.vllmModel = options?.vllmModel || this.vllmModel;
|
||||
this.vllmBaseUrl = options?.vllmBaseUrl || this.vllmBaseUrl;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user