feat(langchain): upgrade to Llama 3.3 Nemotron Super 49B

- Update LangChain service to use Llama 3.3 Nemotron Super 49B v1.5 - Adjust temperature to 0.6 for better response quality - Increase timeout to 120s for larger model - Add top_p, frequency_penalty, and presence_penalty parameters - Remove deprecated response_format configuration
2026-04-22 18:13:52 +00:00 · 2025-10-19 20:57:03 -07:00 · 2025-10-19 20:57:03 -07:00 · 12c4777eae
commit 12c4777eae
parent 5be2ad78bf
1 changed files with 6 additions and 4 deletions
--- a/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/langchain-service.ts
@ -29,7 +29,7 @@ export class LangChainService {
    temperature?: number;
    maxTokens?: number;
  }): Promise<ChatOpenAI> {
-    const modelId = "nvdev/nvidia/llama-3.1-nemotron-70b-instruct";
+    const modelId = "nvidia/llama-3.3-nemotron-super-49b-v1.5";
    const cacheKey = `nemotron-${options?.temperature || 0.7}-${options?.maxTokens || 8192}`;
    
    console.log(`Requesting Nemotron model (cacheKey: ${cacheKey})`);
@ -73,15 +73,17 @@ export class LangChainService {
      // Create a new ChatOpenAI instance
      const model = new ChatOpenAI({
        modelName: modelId,
-        temperature: options?.temperature || 0.7,
+        temperature: options?.temperature || 0.6,
        maxTokens: options?.maxTokens || 8192,
        openAIApiKey: apiKey,
        configuration: {
          baseURL: "https://integrate.api.nvidia.com/v1",
-          timeout: 60000, // 60 second timeout
+          timeout: 120000, // 120 second timeout for larger model
        },
        modelKwargs: {
-          "response_format": { "type": "text" }
+          top_p: 0.95,
+          frequency_penalty: 0,
+          presence_penalty: 0
        }
      });