mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-28 12:43:52 +00:00
Compare commits
3 Commits
7ce2614547
...
1b35d6074f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b35d6074f | ||
|
|
9414a5141f | ||
|
|
911ca6db8b |
@ -47,8 +47,8 @@ All necessary files for the playbook can be found [here on GitHub](https://githu
|
|||||||
* **Duration:** 45-90 minutes for complete setup and initial model fine-tuning
|
* **Duration:** 45-90 minutes for complete setup and initial model fine-tuning
|
||||||
* **Risks:** Model downloads can be large (several GB), ARM64 package compatibility issues may require troubleshooting, distributed training setup complexity increases with multi-node configurations
|
* **Risks:** Model downloads can be large (several GB), ARM64 package compatibility issues may require troubleshooting, distributed training setup complexity increases with multi-node configurations
|
||||||
* **Rollback:** Virtual environments can be completely removed; no system-level changes are made to the host system beyond package installations.
|
* **Rollback:** Virtual environments can be completely removed; no system-level changes are made to the host system beyond package installations.
|
||||||
* **Last Updated:** 01/15/2026
|
* **Last Updated:** 03/04/2026
|
||||||
* Fix qLoRA fine-tuning workflow
|
* Recommend running Nemo finetune workflow via Docker
|
||||||
|
|
||||||
## Instructions
|
## Instructions
|
||||||
|
|
||||||
|
|||||||
@ -27,8 +27,8 @@ services:
|
|||||||
# Ollama configuration
|
# Ollama configuration
|
||||||
- OLLAMA_BASE_URL=http://ollama:11434/v1
|
- OLLAMA_BASE_URL=http://ollama:11434/v1
|
||||||
- OLLAMA_MODEL=llama3.1:8b
|
- OLLAMA_MODEL=llama3.1:8b
|
||||||
# Disable vLLM
|
# vLLM disabled in default Ollama mode
|
||||||
- VLLM_BASE_URL=http://localhost:8001/v1
|
# - VLLM_BASE_URL=http://localhost:8001/v1
|
||||||
- VLLM_MODEL=disabled
|
- VLLM_MODEL=disabled
|
||||||
# Vector DB configuration
|
# Vector DB configuration
|
||||||
- QDRANT_URL=http://qdrant:6333
|
- QDRANT_URL=http://qdrant:6333
|
||||||
|
|||||||
@ -108,7 +108,7 @@ export class TextProcessor {
|
|||||||
|
|
||||||
// Determine which LLM provider to use based on configuration
|
// Determine which LLM provider to use based on configuration
|
||||||
// Priority: vLLM > NVIDIA > Ollama
|
// Priority: vLLM > NVIDIA > Ollama
|
||||||
if (process.env.VLLM_BASE_URL) {
|
if (process.env.VLLM_BASE_URL && process.env.VLLM_MODEL && process.env.VLLM_MODEL !== 'disabled') {
|
||||||
this.selectedLLMProvider = 'vllm';
|
this.selectedLLMProvider = 'vllm';
|
||||||
} else if (process.env.NVIDIA_API_KEY) {
|
} else if (process.env.NVIDIA_API_KEY) {
|
||||||
this.selectedLLMProvider = 'nvidia';
|
this.selectedLLMProvider = 'nvidia';
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user