// // SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // "use client" import { useState, useEffect } from "react" import { ChevronDown, Cpu, Server, RefreshCw } from "lucide-react" import { OllamaIcon } from "@/components/ui/ollama-icon" interface LLMModel { id: string name: string model: string provider: string description?: string } // NVIDIA API models (always available if API key is set) const NVIDIA_MODELS: LLMModel[] = [ { id: "nvidia-nemotron-super", name: "Nemotron Super 49B", model: "nvidia/llama-3.3-nemotron-super-49b-v1.5", provider: "nvidia", description: "NVIDIA API (requires key)" }, { id: "nvidia-nemotron-nano", name: "Nemotron Nano 9B v2", model: "nvidia/nvidia-nemotron-nano-9b-v2", provider: "nvidia", description: "NVIDIA API - Fast & efficient" }, ] export function LLMSelectorCompact() { const [models, setModels] = useState([]) const [selectedModel, setSelectedModel] = useState(null) const [isOpen, setIsOpen] = useState(false) const [isLoading, setIsLoading] = useState(true) // Fetch available models from running backends const fetchAvailableModels = async () => { setIsLoading(true) const availableModels: LLMModel[] = [] // Check vLLM first (port 8001) try { const vllmResponse = await fetch('/api/vllm/models', { signal: AbortSignal.timeout(3000) }) if (vllmResponse.ok) { const data = await vllmResponse.json() if (data.models && Array.isArray(data.models)) { data.models.forEach((model: any) => { const modelId = model.id || model.name || model availableModels.push({ id: `vllm-${modelId}`, name: modelId.split('/').pop() || modelId, model: modelId, provider: "vllm", description: "vLLM (GPU-accelerated)" }) }) } } } catch (e) { // vLLM not available console.log("vLLM not available") } // Check Ollama (port 11434) try { const ollamaResponse = await fetch('/api/ollama/tags', { signal: AbortSignal.timeout(3000) }) if (ollamaResponse.ok) { const data = await ollamaResponse.json() if (data.models && Array.isArray(data.models)) { data.models.forEach((model: any) => { const modelName = model.name || model availableModels.push({ id: `ollama-${modelName}`, name: modelName, model: modelName, provider: "ollama", description: "Local Ollama model" }) }) } } } catch (e) { // Ollama not available console.log("Ollama not available") } // Always add NVIDIA API models availableModels.push(...NVIDIA_MODELS) setModels(availableModels) // Set default selected model if (availableModels.length > 0) { // Try to restore saved selection try { const saved = localStorage.getItem("selectedModelForRAG") if (saved) { const savedModel: LLMModel = JSON.parse(saved) const found = availableModels.find(m => m.id === savedModel.id) if (found) { setSelectedModel(found) setIsLoading(false) return } } } catch (e) { // Ignore } // Default to first available local model (vLLM or Ollama), not NVIDIA API const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama") setSelectedModel(localModel || availableModels[0]) } setIsLoading(false) } // Fetch models on mount useEffect(() => { fetchAvailableModels() }, []) // Save selected model to localStorage and dispatch event const handleSelectModel = (model: LLMModel) => { setSelectedModel(model) setIsOpen(false) localStorage.setItem("selectedModelForRAG", JSON.stringify(model)) // Dispatch event for other components window.dispatchEvent(new CustomEvent('ragModelSelected', { detail: { model } })) } const getModelIcon = (provider: string) => { if (provider === "ollama") { return } if (provider === "vllm") { return } return } const getProviderLabel = (provider: string) => { switch (provider) { case "ollama": return "Ollama" case "vllm": return "vLLM" case "nvidia": return "NVIDIA API" default: return provider } } if (isLoading) { return (
Loading models...
) } if (!selectedModel) { return (
No models available
) } // Group models by provider const groupedModels = models.reduce((acc, model) => { if (!acc[model.provider]) { acc[model.provider] = [] } acc[model.provider].push(model) return acc }, {} as Record) return (
{isOpen && ( <> {/* Backdrop */}
setIsOpen(false)} /> {/* Dropdown */}

Select LLM for Answer Generation

{Object.entries(groupedModels).map(([provider, providerModels]) => (
{getProviderLabel(provider)}
{providerModels.map((model) => ( ))}
))}
)}
) }