dgx-spark-playbooks/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
2025-12-02 19:43:52 +00:00

323 lines
11 KiB
TypeScript

//
// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { ChatOpenAI } from "@langchain/openai";
import { SystemMessage } from "@langchain/core/messages";
/**
* Service for interacting with LLMs through LangChain integrations
*/
export class LangChainService {
private static instance: LangChainService;
// Cache for created models to avoid recreating them
private modelCache: Record<string, ChatOpenAI> = {};
private constructor() {}
/**
* Get the singleton instance of LangChainService
*/
public static getInstance(): LangChainService {
if (!LangChainService.instance) {
LangChainService.instance = new LangChainService();
}
return LangChainService.instance;
}
/**
* Get or create a ChatOpenAI model instance for the NVIDIA Nemotron model
*/
public async getNemotronModel(options?: {
temperature?: number;
maxTokens?: number;
}): Promise<ChatOpenAI> {
const modelId = "nvidia/llama-3.3-nemotron-super-49b-v1.5";
const cacheKey = `nemotron-${options?.temperature || 0.7}-${options?.maxTokens || 8192}`;
console.log(`Requesting Nemotron model (cacheKey: ${cacheKey})`);
if (this.modelCache[cacheKey]) {
console.log(`Using cached model for: ${cacheKey}`);
return this.modelCache[cacheKey];
}
// Try to get API key from server endpoint if in browser
let apiKey: string | undefined;
if (typeof window !== 'undefined') {
try {
console.log("Fetching API key from server endpoint");
const response = await fetch('/api/config');
if (!response.ok) {
throw new Error(`Failed to fetch API key: ${response.statusText}`);
}
const config = await response.json();
apiKey = config.nemotronApiKey;
console.log(`Retrieved API key from server: ${apiKey ? "Yes" : "No"}`);
} catch (error) {
console.error("Error fetching API key:", error);
}
} else {
// Server-side, use environment variable directly
apiKey = process.env.NVIDIA_API_KEY;
console.log(`Retrieved API key from environment: ${apiKey ? "Yes" : "No"}`);
}
// If no API key is found, throw an error
if (!apiKey) {
console.error('No API key found for Nemotron model');
throw new Error('No API key found for Nemotron model. Please add NVIDIA_API_KEY to your environment variables.');
}
console.log(`Creating new ChatOpenAI instance for: ${modelId}`);
try {
// Create a new ChatOpenAI instance
const model = new ChatOpenAI({
modelName: modelId,
temperature: options?.temperature || 0.6,
maxTokens: options?.maxTokens || 8192,
openAIApiKey: apiKey,
configuration: {
baseURL: "https://integrate.api.nvidia.com/v1",
timeout: 120000, // 120 second timeout for larger model
},
modelKwargs: {
top_p: 0.95,
frequency_penalty: 0,
presence_penalty: 0
}
});
console.log(`Created ChatOpenAI instance for model: ${modelId}`);
// Test the model with a simple request before caching
console.log("Testing model with a simple message...");
try {
const testResult = await model.invoke([new SystemMessage("Hello")]);
console.log("Model test successful, response:", testResult.content.toString().substring(0, 50) + "...");
// Cache the model
this.modelCache[cacheKey] = model;
return model;
} catch (testError) {
console.error("Model test failed with error:", testError);
throw new Error(`Model test failed: ${testError instanceof Error ? testError.message : String(testError)}`);
}
} catch (error) {
console.error("Error creating or testing Nemotron model:", error);
throw new Error(`Failed to initialize Nemotron model: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Get or create a ChatOpenAI model instance for any NVIDIA model
*/
public async getNvidiaModel(
modelId: string,
options?: {
temperature?: number;
maxTokens?: number;
}
): Promise<ChatOpenAI> {
const cacheKey = `nvidia-${modelId}-${options?.temperature || 0.7}-${options?.maxTokens || 8192}`;
if (this.modelCache[cacheKey]) {
return this.modelCache[cacheKey];
}
// Try to get API key from server endpoint if in browser
let apiKey: string | undefined;
if (typeof window !== 'undefined') {
try {
console.log("Fetching API key from server endpoint");
const response = await fetch('/api/config');
if (!response.ok) {
throw new Error(`Failed to fetch API key: ${response.statusText}`);
}
const config = await response.json();
// Get the appropriate API key based on the model
apiKey = config.nvidiaApiKey;
} catch (error) {
console.error("Error fetching API key:", error);
}
} else {
// Server-side, use environment variables directly
apiKey = process.env.NVIDIA_API_KEY;
}
// If no API key is found, throw an error
if (!apiKey) {
throw new Error(`No API key found for NVIDIA model: ${modelId}`);
}
// Create a new ChatOpenAI instance
const model = new ChatOpenAI({
modelName: modelId,
temperature: options?.temperature || 0.7,
maxTokens: options?.maxTokens || 8192,
openAIApiKey: apiKey,
configuration: {
baseURL: "https://integrate.api.nvidia.com/v1",
timeout: 60000, // 60 second timeout
},
// Use specific version of ChatCompletions API
modelKwargs: {
"response_format": { "type": "text" }
}
});
console.log(`Created ChatOpenAI instance for model: ${modelId}`);
// Test the model with a simple request before caching
try {
await model.invoke([new SystemMessage("Hello")]);
console.log("Model test successful");
} catch (error) {
console.error("Model test failed:", error);
throw new Error(`Failed to initialize model: ${error instanceof Error ? error.message : String(error)}`);
}
// Cache the model
this.modelCache[cacheKey] = model;
return model;
}
/**
* Get or create a ChatOpenAI model instance for Ollama models
*/
public async getOllamaModel(
modelId: string,
options?: {
temperature?: number;
maxTokens?: number;
baseURL?: string;
}
): Promise<ChatOpenAI> {
const baseURL = options?.baseURL || process.env.OLLAMA_BASE_URL || 'http://localhost:11434/v1';
const cacheKey = `ollama-${modelId}-${options?.temperature || 0.7}-${options?.maxTokens || 8192}-${baseURL}`;
if (this.modelCache[cacheKey]) {
return this.modelCache[cacheKey];
}
console.log(`Creating new ChatOpenAI instance for Ollama model: ${modelId}`);
try {
// Create a new ChatOpenAI instance for Ollama
const model = new ChatOpenAI({
modelName: modelId,
temperature: options?.temperature || 0.7,
maxTokens: options?.maxTokens || 8192,
openAIApiKey: 'ollama', // Ollama doesn't require a real API key
configuration: {
baseURL: baseURL,
timeout: 1800000, // 30 minutes timeout for large model inference
maxRetries: 0, // Disable retries to avoid additional delays
},
modelKwargs: {
"response_format": { "type": "text" }
}
});
console.log(`Created ChatOpenAI instance for Ollama model: ${modelId}`);
// Test the model with a simple request before caching
console.log("Testing Ollama model with a simple message...");
try {
const testResult = await model.invoke([new SystemMessage("Hello")]);
console.log("Ollama model test successful, response:", testResult.content.toString().substring(0, 50) + "...");
// Cache the model
this.modelCache[cacheKey] = model;
return model;
} catch (testError) {
console.error("Ollama model test failed with error:", testError);
throw new Error(`Ollama model test failed: ${testError instanceof Error ? testError.message : String(testError)}`);
}
} catch (error) {
console.error("Error creating or testing Ollama model:", error);
throw new Error(`Failed to initialize Ollama model: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Get or create a ChatOpenAI model instance for vLLM models
*/
public async getVllmModel(
modelId: string,
options?: {
temperature?: number;
maxTokens?: number;
baseURL?: string;
}
): Promise<ChatOpenAI> {
const baseURL = options?.baseURL || process.env.VLLM_BASE_URL || 'http://localhost:8001/v1';
const cacheKey = `vllm-${modelId}-${options?.temperature || 0.7}-${options?.maxTokens || 8192}-${baseURL}`;
if (this.modelCache[cacheKey]) {
return this.modelCache[cacheKey];
}
console.log(`Creating new ChatOpenAI instance for vLLM model: ${modelId}`);
try {
// Create a new ChatOpenAI instance for vLLM
const model = new ChatOpenAI({
modelName: modelId,
temperature: options?.temperature || 0.7,
maxTokens: options?.maxTokens || 8192,
openAIApiKey: 'vllm', // vLLM doesn't require a real API key
configuration: {
baseURL: baseURL,
timeout: 120000, // 2 minute timeout for vLLM inference
},
modelKwargs: {
"response_format": { "type": "text" }
}
});
console.log(`Created ChatOpenAI instance for vLLM model: ${modelId}`);
// Test the model with a simple request before caching
console.log("Testing vLLM model with a simple message...");
try {
const testResult = await model.invoke([new SystemMessage("Hello")]);
console.log("vLLM model test successful, response:", testResult.content.toString().substring(0, 50) + "...");
// Cache the model
this.modelCache[cacheKey] = model;
return model;
} catch (testError) {
console.error("vLLM model test failed with error:", testError);
throw new Error(`vLLM model test failed: ${testError instanceof Error ? testError.message : String(testError)}`);
}
} catch (error) {
console.error("Error creating vLLM model:", error);
throw new Error(`Failed to initialize vLLM model: ${error instanceof Error ? error.message : String(error)}`);
}
}
}
// Export a singleton instance for convenience
export const langChainService = LangChainService.getInstance();