From 12c4777eae64dbc4d54043a472ca270506be276a Mon Sep 17 00:00:00 2001 From: Santosh Bhavani Date: Sun, 19 Oct 2025 20:57:03 -0700 Subject: [PATCH] feat(langchain): upgrade to Llama 3.3 Nemotron Super 49B - Update LangChain service to use Llama 3.3 Nemotron Super 49B v1.5 - Adjust temperature to 0.6 for better response quality - Increase timeout to 120s for larger model - Add top_p, frequency_penalty, and presence_penalty parameters - Remove deprecated response_format configuration --- nvidia/txt2kg/assets/frontend/lib/langchain-service.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts index e24aa85..3d71345 100644 --- a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts +++ b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts @@ -29,7 +29,7 @@ export class LangChainService { temperature?: number; maxTokens?: number; }): Promise { - const modelId = "nvdev/nvidia/llama-3.1-nemotron-70b-instruct"; + const modelId = "nvidia/llama-3.3-nemotron-super-49b-v1.5"; const cacheKey = `nemotron-${options?.temperature || 0.7}-${options?.maxTokens || 8192}`; console.log(`Requesting Nemotron model (cacheKey: ${cacheKey})`); @@ -73,15 +73,17 @@ export class LangChainService { // Create a new ChatOpenAI instance const model = new ChatOpenAI({ modelName: modelId, - temperature: options?.temperature || 0.7, + temperature: options?.temperature || 0.6, maxTokens: options?.maxTokens || 8192, openAIApiKey: apiKey, configuration: { baseURL: "https://integrate.api.nvidia.com/v1", - timeout: 60000, // 60 second timeout + timeout: 120000, // 120 second timeout for larger model }, modelKwargs: { - "response_format": { "type": "text" } + top_p: 0.95, + frequency_penalty: 0, + presence_penalty: 0 } });