fix(txt2kg): force ollama to use cuda_v13 library to enable GPU usage

This commit is contained in:
Santosh Bhavani 2025-12-12 10:42:01 -06:00
parent 5472c97a8c
commit 3af55d4b43
2 changed files with 2 additions and 1 deletions

View File

@ -104,7 +104,7 @@ services:
- OLLAMA_FLASH_ATTENTION=1
- OLLAMA_KEEP_ALIVE=30m
- OLLAMA_CUDA=1
- OLLAMA_LLM_LIBRARY=cuda
- OLLAMA_LLM_LIBRARY=cuda_v13
- OLLAMA_NUM_PARALLEL=1
- OLLAMA_MAX_LOADED_MODELS=1
- OLLAMA_KV_CACHE_TYPE=q8_0

View File

@ -75,6 +75,7 @@ services:
- OLLAMA_NUM_PARALLEL=4 # Process 4 requests in parallel - DGX Spark has unified memory
- OLLAMA_MAX_LOADED_MODELS=1 # Load only one model at a time to avoid VRAM contention
- OLLAMA_KV_CACHE_TYPE=q8_0 # Reduce KV cache VRAM usage with minimal performance impact
- OLLAMA_LLM_LIBRARY=cuda_v13 # Force usage of CUDA v13 library
networks:
- default
restart: unless-stopped