mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-25 03:13:53 +00:00
feat(langchain): upgrade to Llama 3.3 Nemotron Super 49B
- Update LangChain service to use Llama 3.3 Nemotron Super 49B v1.5 - Adjust temperature to 0.6 for better response quality - Increase timeout to 120s for larger model - Add top_p, frequency_penalty, and presence_penalty parameters - Remove deprecated response_format configuration
This commit is contained in:
parent
5be2ad78bf
commit
12c4777eae
@ -29,7 +29,7 @@ export class LangChainService {
|
|||||||
temperature?: number;
|
temperature?: number;
|
||||||
maxTokens?: number;
|
maxTokens?: number;
|
||||||
}): Promise<ChatOpenAI> {
|
}): Promise<ChatOpenAI> {
|
||||||
const modelId = "nvdev/nvidia/llama-3.1-nemotron-70b-instruct";
|
const modelId = "nvidia/llama-3.3-nemotron-super-49b-v1.5";
|
||||||
const cacheKey = `nemotron-${options?.temperature || 0.7}-${options?.maxTokens || 8192}`;
|
const cacheKey = `nemotron-${options?.temperature || 0.7}-${options?.maxTokens || 8192}`;
|
||||||
|
|
||||||
console.log(`Requesting Nemotron model (cacheKey: ${cacheKey})`);
|
console.log(`Requesting Nemotron model (cacheKey: ${cacheKey})`);
|
||||||
@ -73,15 +73,17 @@ export class LangChainService {
|
|||||||
// Create a new ChatOpenAI instance
|
// Create a new ChatOpenAI instance
|
||||||
const model = new ChatOpenAI({
|
const model = new ChatOpenAI({
|
||||||
modelName: modelId,
|
modelName: modelId,
|
||||||
temperature: options?.temperature || 0.7,
|
temperature: options?.temperature || 0.6,
|
||||||
maxTokens: options?.maxTokens || 8192,
|
maxTokens: options?.maxTokens || 8192,
|
||||||
openAIApiKey: apiKey,
|
openAIApiKey: apiKey,
|
||||||
configuration: {
|
configuration: {
|
||||||
baseURL: "https://integrate.api.nvidia.com/v1",
|
baseURL: "https://integrate.api.nvidia.com/v1",
|
||||||
timeout: 60000, // 60 second timeout
|
timeout: 120000, // 120 second timeout for larger model
|
||||||
},
|
},
|
||||||
modelKwargs: {
|
modelKwargs: {
|
||||||
"response_format": { "type": "text" }
|
top_p: 0.95,
|
||||||
|
frequency_penalty: 0,
|
||||||
|
presence_penalty: 0
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user