From 12c4777eae64dbc4d54043a472ca270506be276a Mon Sep 17 00:00:00 2001
From: Santosh Bhavani <santosh.bhavani@live.com>
Date: Sun, 19 Oct 2025 20:57:03 -0700
Subject: [PATCH] feat(langchain): upgrade to Llama 3.3 Nemotron Super 49B

- Update LangChain service to use Llama 3.3 Nemotron Super 49B v1.5
- Adjust temperature to 0.6 for better response quality
- Increase timeout to 120s for larger model
- Add top_p, frequency_penalty, and presence_penalty parameters
- Remove deprecated response_format configuration
---
 nvidia/txt2kg/assets/frontend/lib/langchain-service.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
index e24aa85..3d71345 100644
--- a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
@@ -29,7 +29,7 @@ export class LangChainService {
     temperature?: number;
     maxTokens?: number;
   }): Promise<ChatOpenAI> {
-    const modelId = "nvdev/nvidia/llama-3.1-nemotron-70b-instruct";
+    const modelId = "nvidia/llama-3.3-nemotron-super-49b-v1.5";
     const cacheKey = `nemotron-${options?.temperature || 0.7}-${options?.maxTokens || 8192}`;
     
     console.log(`Requesting Nemotron model (cacheKey: ${cacheKey})`);
@@ -73,15 +73,17 @@ export class LangChainService {
       // Create a new ChatOpenAI instance
       const model = new ChatOpenAI({
         modelName: modelId,
-        temperature: options?.temperature || 0.7,
+        temperature: options?.temperature || 0.6,
         maxTokens: options?.maxTokens || 8192,
         openAIApiKey: apiKey,
         configuration: {
           baseURL: "https://integrate.api.nvidia.com/v1",
-          timeout: 60000, // 60 second timeout
+          timeout: 120000, // 120 second timeout for larger model
         },
         modelKwargs: {
-          "response_format": { "type": "text" }
+          top_p: 0.95,
+          frequency_penalty: 0,
+          presence_penalty: 0
         }
       });