Compare commits

..

3 Commits

Author SHA1 Message Date
Ramzey Ghanaim
1b35d6074f
Merge 050f799875 into 9414a5141f 2026-04-07 11:39:24 -07:00
GitLab CI
9414a5141f chore: Regenerate all playbooks 2026-04-07 04:13:30 +00:00
GitLab CI
911ca6db8b chore: Regenerate all playbooks 2026-04-06 19:32:24 +00:00
3 changed files with 5 additions and 5 deletions

View File

@ -47,8 +47,8 @@ All necessary files for the playbook can be found [here on GitHub](https://githu
* **Duration:** 45-90 minutes for complete setup and initial model fine-tuning * **Duration:** 45-90 minutes for complete setup and initial model fine-tuning
* **Risks:** Model downloads can be large (several GB), ARM64 package compatibility issues may require troubleshooting, distributed training setup complexity increases with multi-node configurations * **Risks:** Model downloads can be large (several GB), ARM64 package compatibility issues may require troubleshooting, distributed training setup complexity increases with multi-node configurations
* **Rollback:** Virtual environments can be completely removed; no system-level changes are made to the host system beyond package installations. * **Rollback:** Virtual environments can be completely removed; no system-level changes are made to the host system beyond package installations.
* **Last Updated:** 01/15/2026 * **Last Updated:** 03/04/2026
* Fix qLoRA fine-tuning workflow * Recommend running Nemo finetune workflow via Docker
## Instructions ## Instructions

View File

@ -27,8 +27,8 @@ services:
# Ollama configuration # Ollama configuration
- OLLAMA_BASE_URL=http://ollama:11434/v1 - OLLAMA_BASE_URL=http://ollama:11434/v1
- OLLAMA_MODEL=llama3.1:8b - OLLAMA_MODEL=llama3.1:8b
# Disable vLLM # vLLM disabled in default Ollama mode
- VLLM_BASE_URL=http://localhost:8001/v1 # - VLLM_BASE_URL=http://localhost:8001/v1
- VLLM_MODEL=disabled - VLLM_MODEL=disabled
# Vector DB configuration # Vector DB configuration
- QDRANT_URL=http://qdrant:6333 - QDRANT_URL=http://qdrant:6333

View File

@ -108,7 +108,7 @@ export class TextProcessor {
// Determine which LLM provider to use based on configuration // Determine which LLM provider to use based on configuration
// Priority: vLLM > NVIDIA > Ollama // Priority: vLLM > NVIDIA > Ollama
if (process.env.VLLM_BASE_URL) { if (process.env.VLLM_BASE_URL && process.env.VLLM_MODEL && process.env.VLLM_MODEL !== 'disabled') {
this.selectedLLMProvider = 'vllm'; this.selectedLLMProvider = 'vllm';
} else if (process.env.NVIDIA_API_KEY) { } else if (process.env.NVIDIA_API_KEY) {
this.selectedLLMProvider = 'nvidia'; this.selectedLLMProvider = 'nvidia';