From d0dbd18840017ba20bec661215a6667b3964e833 Mon Sep 17 00:00:00 2001
From: GitLab CI <automaton@nvidia.com>
Date: Wed, 14 Jan 2026 16:05:35 +0000
Subject: [PATCH] chore: Regenerate all playbooks

---
 README.md                                     |   2 +-
 nvidia/llama-factory/README.md                |  42 +-
 nvidia/sglang/README.md                       |   4 +-
 nvidia/txt2kg/README.md                       |  10 +-
 nvidia/txt2kg/assets/README.md                |  57 ++-
 nvidia/txt2kg/assets/deploy/README.md         |  78 ++-
 nvidia/txt2kg/assets/deploy/app/Dockerfile    |   5 -
 .../txt2kg/assets/deploy/app/pinecone-init.sh |  16 -
 .../compose/docker-compose.complete.yml       |   2 +-
 .../deploy/compose/docker-compose.vllm.yml    | 213 +++++----
 .../assets/deploy/compose/docker-compose.yml  | 116 +++--
 .../assets/deploy/services/vllm/Dockerfile    |   4 +-
 .../deploy/services/vllm/launch_server.sh     | 137 +++---
 nvidia/txt2kg/assets/frontend/README.md       |   6 +-
 .../assets/frontend/app/api/backend/route.ts  |   2 +-
 .../frontend/app/api/embeddings/route.ts      |  22 +-
 .../frontend/app/api/extract-triples/route.ts |   4 +-
 .../assets/frontend/app/api/graph-db/route.ts |   5 +
 .../assets/frontend/app/api/metrics/route.ts  |  10 +-
 .../assets/frontend/app/api/ollama/route.ts   |   2 +-
 .../frontend/app/api/ollama/tags/route.ts     |  32 ++
 .../app/api/pinecone-diag/clear/route.ts      |  20 +-
 .../api/pinecone-diag/create-index/route.ts   |  20 +-
 .../app/api/pinecone-diag/stats/route.ts      |  20 +-
 .../frontend/app/api/rag-query/route.ts       |   2 +-
 .../app/api/sentence-embeddings/route.ts      |   8 +-
 .../assets/frontend/app/api/settings/route.ts |  50 +-
 .../frontend/app/api/vector-db/clear/route.ts |  44 ++
 .../api/vector-db/create-collection/route.ts  |  53 +++
 .../frontend/app/api/vector-db/stats/route.ts |  59 +++
 .../frontend/app/api/vllm/models/route.ts     |  40 ++
 .../assets/frontend/app/api/vllm/route.ts     |   2 +-
 nvidia/txt2kg/assets/frontend/app/globals.css |  85 ++++
 nvidia/txt2kg/assets/frontend/app/page.tsx    |  13 +-
 .../txt2kg/assets/frontend/app/rag/page.tsx   |   4 +-
 .../frontend/components/advanced-options.tsx  |  53 ++-
 .../components/database-connection.tsx        |  37 +-
 .../frontend/components/documents-table.tsx   |  85 +++-
 .../frontend/components/graph-actions.tsx     |  88 ++--
 .../components/llm-selector-compact.tsx       | 260 +++++++---
 .../frontend/components/model-selector.tsx    | 308 +++++++-----
 .../components/pinecone-connection.tsx        |  52 +-
 .../frontend/components/qdrant-connection.tsx | 207 ++++++++
 .../assets/frontend/components/rag-query.tsx  |  11 +-
 .../frontend/components/settings-modal.tsx    |  75 ++-
 .../frontend/components/theme-toggle.tsx      |  10 +-
 .../frontend/components/triple-editor.tsx     |   7 +-
 .../frontend/components/triple-viewer.tsx     | 130 ++++-
 .../frontend/components/ui/progress.tsx       |  14 +-
 .../frontend/components/ui/skeleton.tsx       |  16 +-
 .../assets/frontend/components/ui/switch.tsx  |   4 +-
 .../assets/frontend/components/ui/tabs.tsx    |   2 +-
 .../assets/frontend/components/ui/toast.tsx   |   2 +
 .../frontend/contexts/document-context.tsx    |   5 +
 nvidia/txt2kg/assets/frontend/lib/arangodb.ts | 447 +++++++++---------
 .../assets/frontend/lib/backend-service.ts    |  51 +-
 .../txt2kg/assets/frontend/lib/client-init.ts |  31 +-
 .../assets/frontend/lib/graph-db-service.ts   |  46 +-
 .../assets/frontend/lib/graph-db-util.ts      |  37 +-
 nvidia/txt2kg/assets/frontend/lib/pinecone.ts |  16 -
 nvidia/txt2kg/assets/frontend/lib/qdrant.ts   |   3 +-
 .../assets/frontend/lib/remote-backend.ts     |  41 +-
 .../assets/frontend/lib/text-processor.ts     |   2 +-
 nvidia/txt2kg/assets/frontend/package.json    |   5 +-
 .../styles/nvidia-build-typography.css        |  20 +
 .../assets/scripts/gnn/preprocess_data.py     | 141 ++----
 .../txt2kg/assets/scripts/setup-pinecone.js   |  16 -
 nvidia/txt2kg/assets/start.sh                 |  89 ++--
 nvidia/txt2kg/assets/stop.sh                  |  85 ++--
 nvidia/vllm/README.md                         |   9 +-
 70 files changed, 2341 insertions(+), 1253 deletions(-)
 create mode 100644 nvidia/txt2kg/assets/frontend/app/api/ollama/tags/route.ts
 create mode 100644 nvidia/txt2kg/assets/frontend/app/api/vector-db/clear/route.ts
 create mode 100644 nvidia/txt2kg/assets/frontend/app/api/vector-db/create-collection/route.ts
 create mode 100644 nvidia/txt2kg/assets/frontend/app/api/vector-db/stats/route.ts
 create mode 100644 nvidia/txt2kg/assets/frontend/app/api/vllm/models/route.ts
 create mode 100644 nvidia/txt2kg/assets/frontend/components/qdrant-connection.tsx

diff --git a/README.md b/README.md
index 35eac98..0452d66 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ Each playbook includes prerequisites, step-by-step instructions, troubleshooting
 - [Portfolio Optimization](nvidia/portfolio-optimization/)
 - [Fine-tune with Pytorch](nvidia/pytorch-fine-tune/)
 - [RAG Application in AI Workbench](nvidia/rag-ai-workbench/)
-- [SGLang Inference Server](nvidia/sglang/)
+- [SGLang for Inference](nvidia/sglang/)
 - [Single-cell RNA Sequencing](nvidia/single-cell/)
 - [Speculative Decoding](nvidia/speculative-decoding/)
 - [Set up Tailscale on Your Spark](nvidia/tailscale/)
diff --git a/nvidia/llama-factory/README.md b/nvidia/llama-factory/README.md
index 2107355..4eff817 100644
--- a/nvidia/llama-factory/README.md
+++ b/nvidia/llama-factory/README.md
@@ -67,8 +67,8 @@ model adaptation for specialized domains while leveraging hardware-specific opti
 * **Duration:** 30-60 minutes for initial setup, 1-7 hours for training depending on model size and dataset.
 * **Risks:** Model downloads require significant bandwidth and storage. Training may consume substantial GPU memory and require parameter tuning for hardware constraints.
 * **Rollback:** Remove Docker containers and cloned repositories. Training checkpoints are saved locally and can be deleted to reclaim storage space.
-* **Last Updated:** 12/15/2025
-  * Upgrade to latest pytorch container version nvcr.io/nvidia/pytorch:25.11-py3
+* **Last Updated:** 01/08/2025
+  * Update  to Qwen3 LoRA fine-tuning workflow based on LLaMA Factory updates
 
 ## Instructions
 
@@ -105,10 +105,15 @@ cd LLaMA-Factory
 
 ### Step 4. Install LLaMA Factory with dependencies
 
-Install the package in editable mode with metrics support for training evaluation.
+Remove the torchaudio dependency (not needed for LLM fine-tuning) to avoid conflicts with the container's optimized PyTorch, then install.
 
 ```bash
+## Remove torchaudio dependency that conflicts with NVIDIA's PyTorch build
+sed -i 's/"torchaudio[^"]*",\?//' pyproject.toml
+
+## Install LLaMA Factory with metrics support
 pip install -e ".[metrics]"
+pip install --no-deps torchaudio
 ```
 
 ## Step 5. Verify Pytorch CUDA support. 
@@ -126,7 +131,7 @@ python -c "import torch; print(f'PyTorch: {torch.__version__}, CUDA: {torch.cuda
 Examine the provided LoRA fine-tuning configuration for Llama-3.
 
 ```bash
-cat examples/train_lora/llama3_lora_sft.yaml
+cat examples/train_lora/qwen3_lora_sft.yaml
 ```
 
 ## Step 7. Launch fine-tuning training
@@ -137,20 +142,20 @@ cat examples/train_lora/llama3_lora_sft.yaml
 Execute the training process using the pre-configured LoRA setup.
 
 ```bash
-huggingface-cli login # if the model is gated
-llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
+hf auth login # if the model is gated
+llamafactory-cli train examples/train_lora/qwen3_lora_sft.yaml
 ```
 
 Example output:
-```bash
+```
 ***** train metrics *****
   epoch                    =        3.0
-  total_flos               = 22851591GF
-  train_loss               =     0.9113
-  train_runtime            = 0:22:21.99
-  train_samples_per_second =      2.437
-  train_steps_per_second   =      0.306
-Figure saved at: saves/llama3-8b/lora/sft/training_loss.png
+  total_flos               = 11076559GF
+  train_loss               =     0.9993
+  train_runtime            = 0:14:32.12
+  train_samples_per_second =      3.749
+  train_steps_per_second   =      0.471
+Figure saved at: saves/qwen3-4b/lora/sft/training_loss.png
 ```
 
 ## Step 8. Validate training completion
@@ -158,13 +163,12 @@ Figure saved at: saves/llama3-8b/lora/sft/training_loss.png
 Verify that training completed successfully and checkpoints were saved.
 
 ```bash
-ls -la saves/llama3-8b/lora/sft/
+ls -la saves/qwen3-4b/lora/sft/
 ```
 
-
 Expected output should show:
-- Final checkpoint directory (`checkpoint-21` or similar)
-- Model configuration files (`config.json`, `adapter_config.json`) 
+- Final checkpoint directory (`checkpoint-411` or similar)
+- Model configuration files (`adapter_config.json`) 
 - Training metrics showing decreasing loss values
 - Training loss plot saved as PNG file
 
@@ -173,14 +177,14 @@ Expected output should show:
 Test your fine-tuned model with custom prompts:
 
 ```bash
-llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/qwen3_lora_sft.yaml
 ## Type: "Hello, how can you help me today?"
 ## Expect: Response showing fine-tuned behavior
 ```
 
 ## Step 10. For production deployment, export your model
 ```bash
-llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/qwen3_lora_sft.yaml
 ```
 
 ## Step 11. Cleanup and rollback
diff --git a/nvidia/sglang/README.md b/nvidia/sglang/README.md
index 7157962..de0bf87 100644
--- a/nvidia/sglang/README.md
+++ b/nvidia/sglang/README.md
@@ -1,4 +1,4 @@
-# SGLang Inference Server
+# SGLang for Inference
 
 > Install and use SGLang on DGX Spark
 
@@ -68,6 +68,8 @@ The following models are supported with SGLang on Spark. All listed models are a
 | **Phi-4-reasoning-plus** | FP8 | ✅ | `nvidia/Phi-4-reasoning-plus-FP8` |
 | **Phi-4-reasoning-plus** | NVFP4 | ✅ | `nvidia/Phi-4-reasoning-plus-FP4` |
 
+Note: for NVFP4 models, add the `--quantization modelopt_fp4` flag.
+
 ### Time & risk
 
 * **Estimated time:** 30 minutes for initial setup and validation
diff --git a/nvidia/txt2kg/README.md b/nvidia/txt2kg/README.md
index 320e58e..b8f8edd 100644
--- a/nvidia/txt2kg/README.md
+++ b/nvidia/txt2kg/README.md
@@ -54,9 +54,13 @@ The setup includes:
   - Document processing time scales with document size and complexity
 
 - **Rollback**: Stop and remove Docker containers, delete downloaded models if needed
-- **Last Updated**: 12/02/2025
-  - Knowledge graph search with multi-hop graph traversal
-  - Improved UI/UX
+- **Last Updated**: 01/08/2025
+  - Migrated from Pinecone to Qdrant for ARM64 compatibility
+  - Added vLLM support with Neo4j
+  - Added Palette UI components with accessibility improvements
+  - Added CPU-only mode for development (`./start.sh --cpu`)
+  - Optimized ArangoDB with deterministic keys and BM25 search
+  - Added GNN preprocessing scripts for knowledge graph training
 
 ## Instructions
 
diff --git a/nvidia/txt2kg/assets/README.md b/nvidia/txt2kg/assets/README.md
index c21f185..7ea5296 100644
--- a/nvidia/txt2kg/assets/README.md
+++ b/nvidia/txt2kg/assets/README.md
@@ -19,7 +19,7 @@ This playbook serves as a reference solution for knowledge graph extraction and
 
 </details>
 
-By default, this playbook leverages **Ollama** for local LLM inference, providing a fully self-contained solution that runs entirely on your own hardware. You can optionally use NVIDIA-hosted models available in the [NVIDIA API Catalog](https://build.nvidia.com) for advanced capabilities.
+By default, this playbook leverages **Ollama** for local LLM inference, providing a fully self-contained solution that runs entirely on your own hardware. You can optionally use **vLLM** for GPU-accelerated inference on DGX Spark/GB300, or NVIDIA-hosted models available in the [NVIDIA API Catalog](https://build.nvidia.com) for advanced capabilities.
 
 ## Key Features
 
@@ -33,7 +33,7 @@ By default, this playbook leverages **Ollama** for local LLM inference, providin
 - GPU-accelerated LLM inference with Ollama
 - Fully containerized deployment with Docker Compose
 - Optional NVIDIA API integration for cloud-based models
-- Optional vector search and advanced inference capabilities
+- Optional vector search with Qdrant for semantic similarity
 - Optional graph-based RAG for contextual answers
 
 ## Software Components
@@ -55,9 +55,13 @@ By default, this playbook leverages **Ollama** for local LLM inference, providin
 
 ### Optional Components
 
-* **Vector Database & Embedding** (with `--complete` flag)
+* **vLLM Stack** (with `--vllm` flag)
+  * **vLLM**: GPU-accelerated LLM inference optimized for DGX Spark/GB300
+    * Default model: `nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8`
+  * **Neo4j**: Alternative graph database
+* **Vector Database & Embedding** (with `--vector-search` flag)
   * **SentenceTransformer**: Local embedding generation (model: `all-MiniLM-L6-v2`)
-  * **Pinecone**: Self-hosted vector storage and similarity search
+  * **Qdrant**: Self-hosted vector storage and similarity search
 * **Cloud Models** (configure separately)
   * **NVIDIA API**: Cloud-based models via NVIDIA API Catalog
 
@@ -76,7 +80,7 @@ The core workflow for knowledge graph building and visualization:
 ### Future Enhancements
 
 Additional capabilities can be added:
-- **Vector search**: Add semantic similarity search with local Pinecone and SentenceTransformer embeddings
+- **Vector search**: Add semantic similarity search with Qdrant and SentenceTransformer embeddings
 - **S3 storage**: MinIO for scalable document storage
 - **GNN-based GraphRAG**: Graph Neural Networks for enhanced retrieval
 
@@ -84,7 +88,7 @@ Additional capabilities can be added:
 
 This playbook includes **GPU-accelerated LLM inference** with Ollama:
 
-### Ollama Features
+### Ollama Features (Default)
 - **Fully local inference**: No cloud dependencies or API keys required
 - **GPU acceleration**: Automatic CUDA support with NVIDIA GPUs
 - **Multiple model support**: Use any Ollama-compatible model
@@ -92,7 +96,13 @@ This playbook includes **GPU-accelerated LLM inference** with Ollama:
 - **Easy model management**: Pull and switch models with simple commands
 - **Privacy-first**: All data processing happens on your hardware
 
-### Default Configuration
+### vLLM Alternative (via `--vllm` flag)
+- **High-performance inference**: Optimized for DGX Spark/GB300 unified memory
+- **FP8 quantization**: Efficient memory usage with minimal quality loss
+- **Large context support**: Up to 32K tokens context length
+- **Continuous batching**: High throughput for multiple requests
+
+### Default Ollama Configuration
 - Model: `llama3.1:8b`
 - GPU memory fraction: 0.9 (90% of available VRAM)
 - Flash attention enabled
@@ -152,8 +162,39 @@ docker exec ollama-compose ollama pull llama3.1:8b
 - **ArangoDB**: http://localhost:8529 (no authentication required)
 - **Ollama API**: http://localhost:11434
 
+### Alternative: Using vLLM (for DGX Spark/GB300)
+
+For GPU-accelerated inference with vLLM:
+
+```bash
+./start.sh --vllm
+```
+
+Then wait for vLLM to load the model:
+```bash
+docker logs vllm-service -f
+```
+
+Services:
+- **Web UI**: http://localhost:3001
+- **Neo4j Browser**: http://localhost:7474 (user: `neo4j`, password: `password123`)
+- **vLLM API**: http://localhost:8001
+
+### Adding Vector Search
+
+Enable semantic similarity search:
+```bash
+./start.sh --vector-search
+```
+
+This adds:
+- **Qdrant**: http://localhost:6333
+- **Sentence Transformers**: http://localhost:8000
+
 ## Available Customizations
 
+- **Switch LLM backend**: Use `--vllm` flag for vLLM or default for Ollama
+- **Add vector search**: Use `--vector-search` flag for Qdrant + embeddings
 - **Switch Ollama models**: Use any model from Ollama's library (Llama, Mistral, Qwen, etc.)
 - **Modify extraction prompts**: Customize how triples are extracted from text
 - **Add domain-specific knowledge sources**: Integrate external ontologies or taxonomies
@@ -163,4 +204,4 @@ docker exec ollama-compose ollama pull llama3.1:8b
 
 [MIT](LICENSE)
 
-This project will download and install additional third-party open source software projects and containers.
\ No newline at end of file
+This project will download and install additional third-party open source software projects and containers.
diff --git a/nvidia/txt2kg/assets/deploy/README.md b/nvidia/txt2kg/assets/deploy/README.md
index da33f26..ad19764 100644
--- a/nvidia/txt2kg/assets/deploy/README.md
+++ b/nvidia/txt2kg/assets/deploy/README.md
@@ -4,32 +4,36 @@ This directory contains all deployment-related configuration for the txt2kg proj
 
 ## Structure
 
-- **compose/**: Docker Compose files for local development and testing
-  - `docker-compose.yml`: Minimal Docker Compose configuration (Ollama + ArangoDB + Next.js)
-  - `docker-compose.complete.yml`: Complete stack with optional services (vLLM, Pinecone, Sentence Transformers)
-  - `docker-compose.optional.yml`: Additional optional services
-  - `docker-compose.vllm.yml`: Legacy vLLM configuration (use `--complete` flag instead)
+- **compose/**: Docker Compose configuration
+  - `docker-compose.yml`: ArangoDB + Ollama (default)
+  - `docker-compose.vllm.yml`: Neo4j + vLLM (GPU-accelerated)
 
 - **app/**: Frontend application Docker configuration
   - Dockerfile for Next.js application
 
 - **services/**: Containerized services
-  - **ollama/**: Ollama LLM inference service with GPU support
-  - **sentence-transformers/**: Sentence transformer service for embeddings (optional)
-  - **vllm/**: vLLM inference service with FP8 quantization (optional)
-  - **gpu-viz/**: GPU-accelerated graph visualization services (optional, run separately)
-  - **gnn_model/**: Graph Neural Network model service (experimental, not in default compose files)
+  - **ollama/**: Ollama LLM inference service (default)
+  - **vllm/**: vLLM inference service with GPU support (via `--vllm` flag)
+  - **sentence-transformers/**: Sentence transformer service for embeddings (via `--vector-search` flag)
+  - **gpu-viz/**: GPU-accelerated graph visualization services (run separately)
+  - **gnn_model/**: Graph Neural Network model service (experimental)
 
 ## Usage
 
 **Recommended: Use the start script**
 
 ```bash
-# Minimal setup (Ollama + ArangoDB + Next.js frontend)
+# Default: ArangoDB + Ollama
 ./start.sh
 
-# Complete stack (includes vLLM, Pinecone, Sentence Transformers)
-./start.sh --complete
+# Use Neo4j + vLLM (GPU-accelerated, for DGX Spark/GB300)
+./start.sh --vllm
+
+# Enable vector search (Qdrant + Sentence Transformers)
+./start.sh --vector-search
+
+# Combine options
+./start.sh --vllm --vector-search
 
 # Development mode (run frontend without Docker)
 ./start.sh --dev-frontend
@@ -37,31 +41,55 @@ This directory contains all deployment-related configuration for the txt2kg proj
 
 **Manual Docker Compose commands:**
 
-To start the minimal services:
-
 ```bash
+# Default: ArangoDB + Ollama
 docker compose -f deploy/compose/docker-compose.yml up -d
-```
 
-To start the complete stack:
+# Neo4j + vLLM
+docker compose -f deploy/compose/docker-compose.vllm.yml up -d
 
-```bash
-docker compose -f deploy/compose/docker-compose.complete.yml up -d
+# With vector search services (add --profile vector-search)
+docker compose -f deploy/compose/docker-compose.yml --profile vector-search up -d
+docker compose -f deploy/compose/docker-compose.vllm.yml --profile vector-search up -d
 ```
 
 ## Services Included
 
-### Minimal Stack (default)
+### Default Stack (ArangoDB + Ollama)
 - **Next.js App**: Web UI on port 3001
 - **ArangoDB**: Graph database on port 8529
 - **Ollama**: Local LLM inference on port 11434
 
-### Complete Stack (`--complete` flag)
-All minimal services plus:
-- **vLLM**: Advanced LLM inference on port 8001
-- **Pinecone (Local)**: Vector embeddings on port 5081
+### vLLM Stack (`--vllm` flag) - Neo4j + vLLM
+- **Next.js App**: Web UI on port 3001
+- **Neo4j**: Graph database on ports 7474 (HTTP) and 7687 (Bolt)
+- **vLLM**: GPU-accelerated LLM inference on port 8001
+
+### Vector Search (`--vector-search` profile)
+- **Qdrant**: Vector database on port 6333
 - **Sentence Transformers**: Embedding generation on port 8000
 
 ### Optional Services (run separately)
 - **GPU-Viz Services**: See `services/gpu-viz/README.md` for GPU-accelerated visualization
-- **GNN Model Service**: See `services/gnn_model/README.md` for experimental GNN-based RAG
\ No newline at end of file
+- **GNN Model Service**: See `services/gnn_model/README.md` for experimental GNN-based RAG
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│  Default Stack (./start.sh)          │  vLLM Stack (--vllm)     │
+├──────────────────────────────────────┼──────────────────────────┤
+│                                      │                          │
+│  ┌─────────────┐                     │  ┌─────────────┐         │
+│  │   Next.js   │ port 3001           │  │   Next.js   │ 3001    │
+│  └──────┬──────┘                     │  └──────┬──────┘         │
+│         │                            │         │                │
+│  ┌──────┴──────┐  ┌─────────────┐    │  ┌──────┴──────┐  ┌─────┐│
+│  │  ArangoDB   │  │   Ollama    │    │  │   Neo4j     │  │vLLM ││
+│  │  port 8529  │  │ port 11434  │    │  │  port 7474  │  │8001 ││
+│  └─────────────┘  └─────────────┘    │  └─────────────┘  └─────┘│
+│                                      │                          │
+└──────────────────────────────────────┴──────────────────────────┘
+
+Optional (--vector-search): Qdrant (6333) + Sentence Transformers (8000)
+```
diff --git a/nvidia/txt2kg/assets/deploy/app/Dockerfile b/nvidia/txt2kg/assets/deploy/app/Dockerfile
index ba47645..7305e36 100644
--- a/nvidia/txt2kg/assets/deploy/app/Dockerfile
+++ b/nvidia/txt2kg/assets/deploy/app/Dockerfile
@@ -8,10 +8,6 @@ RUN npm install -g pnpm --force --yes
 
 # Copy dependency files
 COPY ./frontend/package.json ./frontend/pnpm-lock.yaml* ./
-COPY ./scripts/ /scripts/
-
-# Update the setup-pinecone.js path
-RUN sed -i 's|"setup-pinecone": "node ../scripts/setup-pinecone.js"|"setup-pinecone": "node /scripts/setup-pinecone.js"|g' package.json
 
 # Install dependencies with cache mount for faster rebuilds
 RUN --mount=type=cache,target=/root/.local/share/pnpm/store \
@@ -32,7 +28,6 @@ RUN npm install -g pnpm --force --yes
 # Copy node_modules from deps stage
 COPY --from=deps /app/node_modules ./node_modules
 COPY --from=deps /app/package.json ./package.json
-COPY --from=deps /scripts /scripts
 
 # Copy source code
 COPY ./frontend/ ./
diff --git a/nvidia/txt2kg/assets/deploy/app/pinecone-init.sh b/nvidia/txt2kg/assets/deploy/app/pinecone-init.sh
index aea4f33..86eb49c 100755
--- a/nvidia/txt2kg/assets/deploy/app/pinecone-init.sh
+++ b/nvidia/txt2kg/assets/deploy/app/pinecone-init.sh
@@ -1,20 +1,4 @@
 #!/bin/sh
-#
-# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
 
 # Script to initialize Pinecone index at container startup
 echo "Initializing Pinecone index..."
diff --git a/nvidia/txt2kg/assets/deploy/compose/docker-compose.complete.yml b/nvidia/txt2kg/assets/deploy/compose/docker-compose.complete.yml
index 3b57d4f..b6e464e 100644
--- a/nvidia/txt2kg/assets/deploy/compose/docker-compose.complete.yml
+++ b/nvidia/txt2kg/assets/deploy/compose/docker-compose.complete.yml
@@ -104,7 +104,7 @@ services:
       - OLLAMA_FLASH_ATTENTION=1
       - OLLAMA_KEEP_ALIVE=30m
       - OLLAMA_CUDA=1
-      - OLLAMA_LLM_LIBRARY=cuda
+      - OLLAMA_LLM_LIBRARY=cuda_v13
       - OLLAMA_NUM_PARALLEL=1
       - OLLAMA_MAX_LOADED_MODELS=1
       - OLLAMA_KV_CACHE_TYPE=q8_0
diff --git a/nvidia/txt2kg/assets/deploy/compose/docker-compose.vllm.yml b/nvidia/txt2kg/assets/deploy/compose/docker-compose.vllm.yml
index f49e1ef..0d6bf73 100644
--- a/nvidia/txt2kg/assets/deploy/compose/docker-compose.vllm.yml
+++ b/nvidia/txt2kg/assets/deploy/compose/docker-compose.vllm.yml
@@ -1,6 +1,10 @@
-# This is a legacy file - use --with-optional flag instead
-# The vLLM service is now included in docker-compose.optional.yml
-# This file is kept for backwards compatibility
+# txt2kg Docker Compose - Neo4j + vLLM (GPU-accelerated)
+#
+# Optional stack optimized for DGX Spark/GB300 with unified memory support
+#
+# Usage:
+#   ./start.sh --vllm                 # Use this compose file
+#   ./start.sh --vllm --vector-search # Add Qdrant + Sentence Transformers
 
 services:
   app:
@@ -10,105 +14,100 @@ services:
     ports:
       - '3001:3000'
     environment:
-      - ARANGODB_URL=http://arangodb:8529
+      # Neo4j configuration
+      - NEO4J_URI=bolt://neo4j:7687
+      - NEO4J_USER=neo4j
+      - NEO4J_PASSWORD=password123
+      - GRAPH_DB_TYPE=neo4j
+      # Disable ArangoDB
+      - ARANGODB_URL=http://localhost:8529
       - ARANGODB_DB=txt2kg
-      - PINECONE_HOST=entity-embeddings
-      - PINECONE_PORT=5081
-      - PINECONE_API_KEY=pclocal
-      - PINECONE_ENVIRONMENT=local
+      # vLLM configuration (GPU-accelerated)
+      - VLLM_BASE_URL=http://vllm:8001/v1
+      - VLLM_MODEL=nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8
+      # Disable Ollama
+      - OLLAMA_BASE_URL=http://localhost:11434/v1
+      - OLLAMA_MODEL=disabled
+      # Vector DB configuration
+      - QDRANT_URL=http://qdrant:6333
+      - VECTOR_DB_TYPE=qdrant
+      # Embeddings configuration
       - LANGCHAIN_TRACING_V2=true
       - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
       - MODEL_NAME=all-MiniLM-L6-v2
+      - EMBEDDINGS_API_URL=http://sentence-transformers:80
+      # Other settings
       - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
       - NODE_TLS_REJECT_UNAUTHORIZED=0
-      - OLLAMA_BASE_URL=http://ollama:11434/v1
-      - OLLAMA_MODEL=qwen3:1.7b
-      - VLLM_BASE_URL=http://vllm:8001/v1
-      - VLLM_MODEL=meta-llama/Llama-3.2-3B-Instruct
-      - REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083
+      - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
+      - NODE_OPTIONS=--max-http-header-size=80000
+      - UV_THREADPOOL_SIZE=128
+      - HTTP_TIMEOUT=1800000
+      - REQUEST_TIMEOUT=1800000
     networks:
-      - pinecone-net
       - default
       - txt2kg-network
+      - qdrant-net
     depends_on:
-      - arangodb
-      - entity-embeddings
-      - sentence-transformers
-      - vllm
-  arangodb:
-    image: arangodb:latest
-    ports:
-      - '8529:8529'
-    environment:
-      - ARANGO_NO_AUTH=1
-    volumes:
-      - arangodb_data:/var/lib/arangodb3
-      - arangodb_apps_data:/var/lib/arangodb3-apps
-  arangodb-init:
-    image: arangodb:latest
-    depends_on:
-      arangodb:
+      neo4j:
+        condition: service_healthy
+      vllm:
         condition: service_started
-    restart: on-failure
-    entrypoint: >
-      sh -c "
-        echo 'Waiting for ArangoDB to start...' &&
-        sleep 10 &&
-        echo 'Creating txt2kg database...' &&
-        arangosh --server.endpoint tcp://arangodb:8529 --server.authentication false --javascript.execute-string 'try { db._createDatabase(\"txt2kg\"); console.log(\"Database txt2kg created successfully!\"); } catch(e) { if(e.message.includes(\"duplicate\")) { console.log(\"Database txt2kg already exists\"); } else { throw e; } }'
-      "
-  entity-embeddings:
-    image: ghcr.io/pinecone-io/pinecone-index:latest
-    container_name: entity-embeddings
-    environment:
-      PORT: 5081
-      INDEX_TYPE: serverless
-      VECTOR_TYPE: dense
-      DIMENSION: 384
-      METRIC: cosine
-      INDEX_NAME: entity-embeddings
+
+  # Neo4j - Graph database
+  neo4j:
+    image: neo4j:5-community
     ports:
-      - "5081:5081"
-    platform: linux/amd64
-    networks:
-      - pinecone-net
-    restart: unless-stopped
-  sentence-transformers:
-    build:
-      context: ../../deploy/services/sentence-transformers
-      dockerfile: Dockerfile
-    ports:
-      - '8000:80'
+      - '7474:7474'
+      - '7687:7687'
     environment:
-      - MODEL_NAME=all-MiniLM-L6-v2
+      - NEO4J_AUTH=neo4j/password123
+      - NEO4J_server_memory_heap_initial__size=512m
+      - NEO4J_server_memory_heap_max__size=2G
+    volumes:
+      - neo4j_data:/data
+      - neo4j_logs:/logs
     networks:
       - default
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:7474 || exit 1"]
+      interval: 15s
+      timeout: 10s
+      retries: 10
+      start_period: 60s
+
+  # vLLM - GPU-accelerated LLM with unified memory support
   vllm:
     build:
-      context: ../../deploy/services/vllm
+      context: ../services/vllm
       dockerfile: Dockerfile
     container_name: vllm-service
     ports:
       - '8001:8001'
+    ipc: host
+    ulimits:
+      memlock: -1
+      stack: 67108864
+    shm_size: '16gb'
     environment:
-      # Model configuration
-      - VLLM_MODEL=meta-llama/Llama-3.2-3B-Instruct
+      - VLLM_MODEL=nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8
       - VLLM_TENSOR_PARALLEL_SIZE=1
-      - VLLM_MAX_MODEL_LEN=4096
+      - VLLM_MAX_MODEL_LEN=32768
       - VLLM_GPU_MEMORY_UTILIZATION=0.9
-      # NVfp4 quantization settings
-      - VLLM_QUANTIZATION=fp8
-      - VLLM_KV_CACHE_DTYPE=fp8
-      # Service configuration
+      - VLLM_MAX_NUM_SEQS=32
+      - VLLM_MAX_NUM_BATCHED_TOKENS=32768
+      - VLLM_KV_CACHE_DTYPE=auto
       - VLLM_PORT=8001
       - VLLM_HOST=0.0.0.0
-      # Performance tuning
       - CUDA_VISIBLE_DEVICES=0
       - NCCL_DEBUG=INFO
+      - CUDA_MANAGED_FORCE_DEVICE_ALLOC=1
+      - PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+      - VLLM_CPU_OFFLOAD_GB=0
     volumes:
       - vllm_models:/app/models
       - /tmp:/tmp
-      # Mount model cache for faster startup
       - ~/.cache/huggingface:/root/.cache/huggingface
     networks:
       - default
@@ -121,21 +120,75 @@ services:
               count: 1
               capabilities: [gpu]
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8001/v1/models"]
-      interval: 30s
-      timeout: 10s
-      retries: 5
-      start_period: 120s  # Longer start period for model loading
+      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
+      interval: 60s
+      timeout: 30s
+      retries: 30
+      start_period: 1800s
+
+  # Optional: Vector search services
+  sentence-transformers:
+    build:
+      context: ../services/sentence-transformers
+      dockerfile: Dockerfile
+    ports:
+      - '8000:80'
+    environment:
+      - MODEL_NAME=all-MiniLM-L6-v2
+    networks:
+      - default
+    restart: unless-stopped
+    profiles:
+      - vector-search
+
+  qdrant:
+    image: qdrant/qdrant:latest
+    container_name: qdrant
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    volumes:
+      - qdrant_data:/qdrant/storage
+    networks:
+      - qdrant-net
+    restart: unless-stopped
+    profiles:
+      - vector-search
+
+  qdrant-init:
+    image: curlimages/curl:latest
+    depends_on:
+      - qdrant
+    restart: "no"
+    entrypoint: /bin/sh
+    command:
+      - -c
+      - |
+        echo 'Waiting for Qdrant to start...'
+        sleep 5
+        curl -X PUT http://qdrant:6333/collections/entity-embeddings \
+          -H 'Content-Type: application/json' \
+          -d '{"vectors":{"size":384,"distance":"Cosine"}}' || true
+        curl -X PUT http://qdrant:6333/collections/document-embeddings \
+          -H 'Content-Type: application/json' \
+          -d '{"vectors":{"size":384,"distance":"Cosine"}}' || true
+        echo 'Collections created'
+    networks:
+      - qdrant-net
+    profiles:
+      - vector-search
 
 volumes:
-  arangodb_data:
-  arangodb_apps_data:
+  neo4j_data:
+  neo4j_logs:
   vllm_models:
+  qdrant_data:
 
 networks:
-  pinecone-net:
-    name: pinecone
   default:
     driver: bridge
   txt2kg-network:
     driver: bridge
+  qdrant-net:
+    name: qdrant-network
+
diff --git a/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml b/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
index 07e9e7d..b099b40 100644
--- a/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
+++ b/nvidia/txt2kg/assets/deploy/compose/docker-compose.yml
@@ -1,3 +1,12 @@
+# txt2kg Docker Compose - ArangoDB + Ollama (Default)
+#
+# Default stack tested and working on DGX Spark
+#
+# Usage:
+#   ./start.sh                    # Default: ArangoDB + Ollama
+#   ./start.sh --vector-search    # Add Qdrant + Sentence Transformers
+#
+# For Neo4j + vLLM, use: ./start.sh --vllm
 
 services:
   app:
@@ -7,21 +16,32 @@ services:
     ports:
       - '3001:3000'
     environment:
+      # ArangoDB configuration
       - ARANGODB_URL=http://arangodb:8529
       - ARANGODB_DB=txt2kg
+      - GRAPH_DB_TYPE=arangodb
+      # Disable Neo4j
+      - NEO4J_URI=bolt://localhost:7687
+      - NEO4J_USER=neo4j
+      - NEO4J_PASSWORD=password123
+      # Ollama configuration
+      - OLLAMA_BASE_URL=http://ollama:11434/v1
+      - OLLAMA_MODEL=llama3.1:8b
+      # Disable vLLM
+      - VLLM_BASE_URL=http://localhost:8001/v1
+      - VLLM_MODEL=disabled
+      # Vector DB configuration
       - QDRANT_URL=http://qdrant:6333
       - VECTOR_DB_TYPE=qdrant
+      # Embeddings configuration
       - LANGCHAIN_TRACING_V2=true
       - SENTENCE_TRANSFORMER_URL=http://sentence-transformers:80
       - MODEL_NAME=all-MiniLM-L6-v2
       - EMBEDDINGS_API_URL=http://sentence-transformers:80
+      # Other settings
       - GRPC_SSL_CIPHER_SUITES=HIGH+ECDSA:HIGH+aRSA
       - NODE_TLS_REJECT_UNAUTHORIZED=0
-      - OLLAMA_BASE_URL=http://ollama:11434/v1
-      - OLLAMA_MODEL=llama3.1:8b
-      - REMOTE_WEBGPU_SERVICE_URL=http://txt2kg-remote-webgpu:8083
       - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
-      # Node.js timeout configurations for large model processing
       - NODE_OPTIONS=--max-http-header-size=80000
       - UV_THREADPOOL_SIZE=128
       - HTTP_TIMEOUT=1800000
@@ -29,12 +49,14 @@ services:
     networks:
       - default
       - txt2kg-network
-      - pinecone-net
+      - qdrant-net
     depends_on:
-      - arangodb
-      - ollama
-      # Optional: sentence-transformers and entity-embeddings are only needed for vector search
-      # Traditional graph search works without these services
+      arangodb:
+        condition: service_started
+      ollama:
+        condition: service_started
+
+  # ArangoDB - Graph database
   arangodb:
     image: arangodb:latest
     ports:
@@ -44,6 +66,11 @@ services:
     volumes:
       - arangodb_data:/var/lib/arangodb3
       - arangodb_apps_data:/var/lib/arangodb3-apps
+    networks:
+      - default
+    restart: unless-stopped
+
+  # ArangoDB initialization - create database
   arangodb-init:
     image: arangodb:latest
     depends_on:
@@ -57,6 +84,10 @@ services:
         echo 'Creating txt2kg database...' &&
         arangosh --server.endpoint tcp://arangodb:8529 --server.authentication false --javascript.execute-string 'try { db._createDatabase(\"txt2kg\"); console.log(\"Database txt2kg created successfully!\"); } catch(e) { if(e.message.includes(\"duplicate\")) { console.log(\"Database txt2kg already exists\"); } else { throw e; } }'
       "
+    networks:
+      - default
+
+  # Ollama - Local LLM inference
   ollama:
     build:
       context: ../services/ollama
@@ -68,13 +99,16 @@ services:
     volumes:
       - ollama_data:/root/.ollama
     environment:
-      - NVIDIA_VISIBLE_DEVICES=all        # Make all GPUs visible to the container
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility  # Required capabilities for CUDA
-      - OLLAMA_FLASH_ATTENTION=1          # Enable flash attention for better performance
-      - OLLAMA_KEEP_ALIVE=30m             # Keep models loaded for 30 minutes
-      - OLLAMA_NUM_PARALLEL=4             # Process 4 requests in parallel - DGX Spark has unified memory
-      - OLLAMA_MAX_LOADED_MODELS=1        # Load only one model at a time to avoid VRAM contention
-      - OLLAMA_KV_CACHE_TYPE=q8_0         # Reduce KV cache VRAM usage with minimal performance impact
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+      - CUDA_VISIBLE_DEVICES=0
+      - OLLAMA_FLASH_ATTENTION=1
+      - OLLAMA_KEEP_ALIVE=30m
+      - OLLAMA_NUM_PARALLEL=4
+      - OLLAMA_MAX_LOADED_MODELS=1
+      - OLLAMA_KV_CACHE_TYPE=q8_0
+      - OLLAMA_GPU_LAYERS=-1
+      - OLLAMA_LLM_LIBRARY=cuda_v13
     networks:
       - default
     restart: unless-stopped
@@ -91,9 +125,8 @@ services:
       timeout: 10s
       retries: 3
       start_period: 60s
-  
-  # Optional services for vector search (NOT required for traditional graph search)
-  # Traditional graph search works with just: app, arangodb, and ollama
+
+  # Optional: Vector search services
   sentence-transformers:
     build:
       context: ../services/sentence-transformers
@@ -106,7 +139,8 @@ services:
       - default
     restart: unless-stopped
     profiles:
-      - vector-search  # Only start with: docker compose --profile vector-search up
+      - vector-search
+
   qdrant:
     image: qdrant/qdrant:latest
     container_name: qdrant
@@ -116,10 +150,11 @@ services:
     volumes:
       - qdrant_data:/qdrant/storage
     networks:
-      - pinecone-net
+      - qdrant-net
     restart: unless-stopped
     profiles:
-      - vector-search  # Only start with: docker compose --profile vector-search up
+      - vector-search
+
   qdrant-init:
     image: curlimages/curl:latest
     depends_on:
@@ -131,32 +166,15 @@ services:
       - |
         echo 'Waiting for Qdrant to start...'
         sleep 5
-        echo 'Checking if entity-embeddings collection exists...'
-        RESPONSE=$(curl -s http://qdrant:6333/collections/entity-embeddings)
-        if echo "$RESPONSE" | grep -q '"status":"ok"'; then
-          echo 'entity-embeddings collection already exists'
-        else
-          echo 'Creating collection entity-embeddings...'
-          curl -X PUT http://qdrant:6333/collections/entity-embeddings \
-            -H 'Content-Type: application/json' \
-            -d '{"vectors":{"size":384,"distance":"Cosine"}}'
-          echo ''
-          echo 'entity-embeddings collection created successfully'
-        fi
-        echo 'Checking if document-embeddings collection exists...'
-        RESPONSE=$(curl -s http://qdrant:6333/collections/document-embeddings)
-        if echo "$RESPONSE" | grep -q '"status":"ok"'; then
-          echo 'document-embeddings collection already exists'
-        else
-          echo 'Creating collection document-embeddings...'
-          curl -X PUT http://qdrant:6333/collections/document-embeddings \
-            -H 'Content-Type: application/json' \
-            -d '{"vectors":{"size":384,"distance":"Cosine"}}'
-          echo ''
-          echo 'document-embeddings collection created successfully'
-        fi
+        curl -X PUT http://qdrant:6333/collections/entity-embeddings \
+          -H 'Content-Type: application/json' \
+          -d '{"vectors":{"size":384,"distance":"Cosine"}}' || true
+        curl -X PUT http://qdrant:6333/collections/document-embeddings \
+          -H 'Content-Type: application/json' \
+          -d '{"vectors":{"size":384,"distance":"Cosine"}}' || true
+        echo 'Collections created'
     networks:
-      - pinecone-net
+      - qdrant-net
     profiles:
       - vector-search
 
@@ -171,5 +189,5 @@ networks:
     driver: bridge
   txt2kg-network:
     driver: bridge
-  pinecone-net:
-    name: pinecone
+  qdrant-net:
+    name: qdrant-network
diff --git a/nvidia/txt2kg/assets/deploy/services/vllm/Dockerfile b/nvidia/txt2kg/assets/deploy/services/vllm/Dockerfile
index a07c298..9716fb7 100644
--- a/nvidia/txt2kg/assets/deploy/services/vllm/Dockerfile
+++ b/nvidia/txt2kg/assets/deploy/services/vllm/Dockerfile
@@ -1,5 +1,5 @@
-# Use NVIDIA Triton Inference Server with vLLM - optimized for latest NVIDIA hardware
-FROM nvcr.io/nvidia/tritonserver:25.08-vllm-python-py3
+# Use official NVIDIA vLLM image - optimized for NVIDIA hardware
+FROM nvcr.io/nvidia/vllm:25.11-py3
 
 # Install curl for health checks
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
diff --git a/nvidia/txt2kg/assets/deploy/services/vllm/launch_server.sh b/nvidia/txt2kg/assets/deploy/services/vllm/launch_server.sh
index a9a09ae..1ec4424 100755
--- a/nvidia/txt2kg/assets/deploy/services/vllm/launch_server.sh
+++ b/nvidia/txt2kg/assets/deploy/services/vllm/launch_server.sh
@@ -21,17 +21,11 @@
 
 # Enable unified memory usage for DGX Spark
 export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+export PYTORCH_ALLOC_CONF=expandable_segments:True
 
 # Enable CUDA unified memory and oversubscription
-export CUDA_VISIBLE_DEVICES=0
 export PYTORCH_NO_CUDA_MEMORY_CACHING=0
 
-# Force vLLM to use CPU offloading for large models
-export VLLM_CPU_OFFLOAD_GB=50
-export VLLM_ALLOW_RUNTIME_LORA_UPDATES_WITH_SGD_LORA=1
-export VLLM_SKIP_WARMUP=0
-
 # Optimized environment for performance
 export VLLM_LOGGING_LEVEL=INFO
 export PYTHONUNBUFFERED=1
@@ -39,8 +33,12 @@ export PYTHONUNBUFFERED=1
 # Enable CUDA optimizations
 export VLLM_USE_MODELSCOPE=false
 
-# Enable unified memory in vLLM
-export VLLM_USE_V1=0
+# Enable FP8 MoE optimizations for Nemotron and other MoE models
+export VLLM_USE_FLASHINFER_MOE_FP8=1
+export VLLM_USE_FLASHINFER_MOE_FP4=1
+
+# Enable FlashInfer attention backend for better performance
+export VLLM_ATTENTION_BACKEND=FLASHINFER
 
 # First, test basic CUDA functionality
 echo "=== Testing CUDA functionality ==="
@@ -64,68 +62,89 @@ if torch.cuda.is_available():
 "
 
 echo "=== Starting optimized vLLM server ==="
-# Optimized configuration for DGX Spark performance with NVFP4 quantization
-# Available quantized models from NVIDIA
-NVFP4_MODEL="nvidia/Llama-3.3-70B-Instruct-FP4"
-NVFP8_MODEL="nvidia/Llama-3.1-8B-Instruct-FP8"
-STANDARD_MODEL="meta-llama/Llama-3.1-70B-Instruct"
 
-# Check GPU compute capability for optimal quantization
+# Check GPU compute capability for optimal settings
 COMPUTE_CAPABILITY=$(nvidia-smi -i 0 --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null || echo "unknown")
 echo "Detected GPU compute capability: $COMPUTE_CAPABILITY"
 
-# Configure quantization based on GPU architecture
-if [[ "$COMPUTE_CAPABILITY" == "12.1" ]] || [[ "$COMPUTE_CAPABILITY" == "10.0" ]]; then
-    # Blackwell/DGX Spark architecture - use standard 70B model with CPU offloading
-    echo "Using standard Llama-3.1-70B model for Blackwell/DGX Spark with CPU offloading"
-    QUANTIZATION_FLAG=""
-    MODEL_TO_USE="$STANDARD_MODEL"  # Use standard 70B model
-    GPU_MEMORY_UTIL="0.7"  # Lower GPU memory to allow unified memory
-    MAX_MODEL_LEN="4096"   # Shorter sequences for memory efficiency
-    MAX_NUM_SEQS="16"      # Lower concurrent sequences for 70B
-    MAX_BATCHED_TOKENS="4096"
-    CPU_OFFLOAD_GB="50"    # Offload 50GB to CPU/unified memory
-elif [[ "$COMPUTE_CAPABILITY" == "9.0" ]]; then
-    # Hopper architecture - use standard model
-    echo "Using standard 70B model for Hopper architecture"
-    QUANTIZATION_FLAG=""
-    MODEL_TO_USE="$STANDARD_MODEL"
-    GPU_MEMORY_UTIL="0.7"
-    MAX_MODEL_LEN="4096"
-    MAX_NUM_SEQS="16"
-    MAX_BATCHED_TOKENS="4096"
-    CPU_OFFLOAD_GB="40"
+# Use environment variable if set, otherwise default to Qwen (not gated)
+if [ -n "$VLLM_MODEL" ]; then
+    MODEL_TO_USE="$VLLM_MODEL"
+    echo "Using model from environment: $MODEL_TO_USE"
 else
-    # Other architectures - use standard precision
-    echo "Using standard 70B model for GPU architecture: $COMPUTE_CAPABILITY"
-    QUANTIZATION_FLAG=""
-    MODEL_TO_USE="$STANDARD_MODEL"
-    GPU_MEMORY_UTIL="0.7"
-    MAX_MODEL_LEN="2048"
-    MAX_NUM_SEQS="16"
-    MAX_BATCHED_TOKENS="2048"
-    CPU_OFFLOAD_GB="40"
+    # Default to Qwen 2.5 7B - not gated, no HuggingFace token required
+    MODEL_TO_USE="Qwen/Qwen2.5-7B-Instruct"
+    echo "Using default model: $MODEL_TO_USE"
 fi
 
-echo "Using model: $MODEL_TO_USE"
-echo "Quantization: ${QUANTIZATION_FLAG:-'disabled'}"
+# Configure settings based on model size and GPU architecture
+# Check if using 8B or smaller model
+if [[ "$MODEL_TO_USE" == *"8B"* ]] || [[ "$MODEL_TO_USE" == *"7B"* ]] || [[ "$MODEL_TO_USE" == *"3B"* ]] || [[ "$MODEL_TO_USE" == *"1B"* ]]; then
+    echo "Configuring for smaller model (8B or less)"
+    QUANTIZATION_FLAG=""
+    GPU_MEMORY_UTIL="${VLLM_GPU_MEMORY_UTILIZATION:-0.9}"
+    MAX_MODEL_LEN="${VLLM_MAX_MODEL_LEN:-8192}"
+    MAX_NUM_SEQS="${VLLM_MAX_NUM_SEQS:-64}"
+    MAX_BATCHED_TOKENS="${VLLM_MAX_NUM_BATCHED_TOKENS:-8192}"
+    CPU_OFFLOAD_GB="${VLLM_CPU_OFFLOAD_GB:-0}"
+elif [[ "$COMPUTE_CAPABILITY" == "12.1" ]] || [[ "$COMPUTE_CAPABILITY" == "10.0" ]]; then
+    # Blackwell/DGX Spark architecture with larger model - use CPU offloading
+    echo "Configuring for large model on Blackwell/DGX Spark with CPU offloading"
+    QUANTIZATION_FLAG=""
+    GPU_MEMORY_UTIL="${VLLM_GPU_MEMORY_UTILIZATION:-0.7}"
+    MAX_MODEL_LEN="${VLLM_MAX_MODEL_LEN:-4096}"
+    MAX_NUM_SEQS="${VLLM_MAX_NUM_SEQS:-16}"
+    MAX_BATCHED_TOKENS="${VLLM_MAX_NUM_BATCHED_TOKENS:-4096}"
+    CPU_OFFLOAD_GB="${VLLM_CPU_OFFLOAD_GB:-50}"
+else
+    # Other architectures with larger model
+    echo "Configuring for large model on GPU architecture: $COMPUTE_CAPABILITY"
+    QUANTIZATION_FLAG=""
+    GPU_MEMORY_UTIL="${VLLM_GPU_MEMORY_UTILIZATION:-0.7}"
+    MAX_MODEL_LEN="${VLLM_MAX_MODEL_LEN:-4096}"
+    MAX_NUM_SEQS="${VLLM_MAX_NUM_SEQS:-16}"
+    MAX_BATCHED_TOKENS="${VLLM_MAX_NUM_BATCHED_TOKENS:-4096}"
+    CPU_OFFLOAD_GB="${VLLM_CPU_OFFLOAD_GB:-40}"
+fi
+
+echo ""
+echo "=== vLLM Configuration ==="
+echo "Model: $MODEL_TO_USE"
 echo "GPU memory utilization: $GPU_MEMORY_UTIL"
-
+echo "Max model length: $MAX_MODEL_LEN"
+echo "Max num seqs: $MAX_NUM_SEQS"
+echo "Max batched tokens: $MAX_BATCHED_TOKENS"
 echo "CPU Offload: ${CPU_OFFLOAD_GB}GB"
+echo "Quantization: ${QUANTIZATION_FLAG:-'none'}"
+echo ""
 
-vllm serve "$MODEL_TO_USE" \
+# Build command - only add cpu-offload-gb if > 0
+VLLM_CMD="vllm serve $MODEL_TO_USE \
   --host 0.0.0.0 \
   --port 8001 \
   --tensor-parallel-size 1 \
-  --max-model-len "$MAX_MODEL_LEN" \
-  --max-num-seqs "$MAX_NUM_SEQS" \
-  --max-num-batched-tokens "$MAX_BATCHED_TOKENS" \
-  --gpu-memory-utilization "$GPU_MEMORY_UTIL" \
-  --cpu-offload-gb "$CPU_OFFLOAD_GB" \
+  --max-model-len $MAX_MODEL_LEN \
+  --max-num-seqs $MAX_NUM_SEQS \
+  --gpu-memory-utilization $GPU_MEMORY_UTIL \
   --kv-cache-dtype auto \
   --trust-remote-code \
-  --served-model-name "$MODEL_TO_USE" \
-  --enable-chunked-prefill \
-  --disable-custom-all-reduce \
-  --disable-async-output-proc \
-  $QUANTIZATION_FLAG
\ No newline at end of file
+  --served-model-name $MODEL_TO_USE"
+
+# Note: For FP8 models, vLLM auto-detects quantization from model config
+# No need to specify --dtype float8 (not supported in vLLM 0.11.0)
+if [[ "$MODEL_TO_USE" == *"FP8"* ]] || [[ "$MODEL_TO_USE" == *"fp8"* ]]; then
+  echo "Detected FP8 model - vLLM will auto-detect FP8 quantization from model config"
+fi
+
+# Add CPU offload only for larger models
+if [ "$CPU_OFFLOAD_GB" -gt 0 ] 2>/dev/null; then
+  VLLM_CMD="$VLLM_CMD --cpu-offload-gb $CPU_OFFLOAD_GB"
+fi
+
+# Add quantization if specified
+if [ -n "$QUANTIZATION_FLAG" ]; then
+  VLLM_CMD="$VLLM_CMD $QUANTIZATION_FLAG"
+fi
+
+echo "Running: $VLLM_CMD"
+exec $VLLM_CMD
\ No newline at end of file
diff --git a/nvidia/txt2kg/assets/frontend/README.md b/nvidia/txt2kg/assets/frontend/README.md
index 1df5405..f341f89 100644
--- a/nvidia/txt2kg/assets/frontend/README.md
+++ b/nvidia/txt2kg/assets/frontend/README.md
@@ -18,7 +18,7 @@ This directory contains the Next.js frontend application for the txt2kg project.
 - **lib/**: Utility functions and shared logic
   - LLM service (Ollama, vLLM, NVIDIA API integration)
   - Graph database services (ArangoDB, Neo4j)
-  - Pinecone vector database integration
+  - Qdrant vector database integration
   - RAG service for knowledge graph querying
 - **public/**: Static assets
 - **types/**: TypeScript type definitions for graph data structures
@@ -76,7 +76,7 @@ Required environment variables are configured in docker-compose files:
 - `OLLAMA_BASE_URL`: Ollama API endpoint
 - `VLLM_BASE_URL`: vLLM API endpoint (optional)
 - `NVIDIA_API_KEY`: NVIDIA API key (optional)
-- `PINECONE_HOST`: Local Pinecone host (optional)
+- `QDRANT_URL`: Qdrant vector database URL (optional)
 - `SENTENCE_TRANSFORMER_URL`: Embeddings service URL (optional)
 
 ## Features
@@ -86,4 +86,4 @@ Required environment variables are configured in docker-compose files:
 - **RAG Queries**: Query knowledge graphs with retrieval-augmented generation
 - **Multiple LLM Providers**: Support for Ollama, vLLM, and NVIDIA API
 - **GPU-Accelerated Rendering**: Optional PyGraphistry integration for large graphs
-- **Vector Search**: Pinecone integration for semantic search 
\ No newline at end of file
+- **Vector Search**: Qdrant integration for semantic search 
\ No newline at end of file
diff --git a/nvidia/txt2kg/assets/frontend/app/api/backend/route.ts b/nvidia/txt2kg/assets/frontend/app/api/backend/route.ts
index 0103ff7..7413853 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/backend/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/backend/route.ts
@@ -21,7 +21,7 @@ import { getGraphDbType } from '../settings/route';
 
 /**
  * Remote backend API that provides endpoints for creating and querying a knowledge graph
- * using the selected graph database, Pinecone, and SentenceTransformer
+ * using the selected graph database, Qdrant, and SentenceTransformer
  */
 
 /**
diff --git a/nvidia/txt2kg/assets/frontend/app/api/embeddings/route.ts b/nvidia/txt2kg/assets/frontend/app/api/embeddings/route.ts
index e6038e5..84b0266 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/embeddings/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/embeddings/route.ts
@@ -56,24 +56,24 @@ export async function POST(request: NextRequest) {
     console.log(`Generated ${embeddings.length} embeddings`);
 
     // Initialize QdrantService
-    const pineconeService = QdrantService.getInstance();
+    const qdrantService = QdrantService.getInstance();
 
     // Check if Qdrant server is running
-    const isPineconeRunning = await pineconeService.isQdrantRunning();
-    if (!isPineconeRunning) {
+    const isQdrantRunning = await qdrantService.isQdrantRunning();
+    if (!isQdrantRunning) {
       return NextResponse.json(
         { error: 'Qdrant server is not available. Please make sure it is running.' },
         { status: 503 }
       );
     }
     
-    if (!pineconeService.isInitialized()) {
+    if (!qdrantService.isInitialized()) {
       try {
-        await pineconeService.initialize();
+        await qdrantService.initialize();
       } catch (initError) {
-        console.error('Error initializing Pinecone:', initError);
+        console.error('Error initializing Qdrant:', initError);
         return NextResponse.json(
-          { error: `Failed to initialize Pinecone: ${initError instanceof Error ? initError.message : String(initError)}` },
+          { error: `Failed to initialize Qdrant: ${initError instanceof Error ? initError.message : String(initError)}` },
           { status: 500 }
         );
       }
@@ -89,13 +89,13 @@ export async function POST(request: NextRequest) {
       textContent.set(chunkIds[i], chunks[i]);
     }
     
-    // Store embeddings in PineconeService with retry logic
+    // Store embeddings in Qdrant with retry logic
     try {
-      await pineconeService.storeEmbeddings(entityEmbeddings, textContent);
+      await qdrantService.storeEmbeddings(entityEmbeddings, textContent);
     } catch (storeError) {
-      console.error('Error storing embeddings in Pinecone:', storeError);
+      console.error('Error storing embeddings in Qdrant:', storeError);
       return NextResponse.json(
-        { error: `Failed to store embeddings in Pinecone: ${storeError instanceof Error ? storeError.message : String(storeError)}` },
+        { error: `Failed to store embeddings in Qdrant: ${storeError instanceof Error ? storeError.message : String(storeError)}` },
         { status: 500 }
       );
     }
diff --git a/nvidia/txt2kg/assets/frontend/app/api/extract-triples/route.ts b/nvidia/txt2kg/assets/frontend/app/api/extract-triples/route.ts
index 0e82e80..e596d50 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/extract-triples/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/extract-triples/route.ts
@@ -132,9 +132,9 @@ export async function POST(req: NextRequest) {
         },
         body: JSON.stringify({
           text,
-          model: vllmModel || 'meta-llama/Llama-3.2-3B-Instruct',
+          model: vllmModel || process.env.VLLM_MODEL || 'nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8',
           temperature: 0.1,
-          maxTokens: 8192
+          maxTokens: 4096  // Reduced to leave room for input tokens in context
         })
       });
 
diff --git a/nvidia/txt2kg/assets/frontend/app/api/graph-db/route.ts b/nvidia/txt2kg/assets/frontend/app/api/graph-db/route.ts
index 91bb57f..eb44784 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/graph-db/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/graph-db/route.ts
@@ -88,13 +88,18 @@ async function ensureConnection(request?: NextRequest): Promise<GraphDBType> {
  * GET handler for retrieving graph data from the selected graph database
  */
 export async function GET(request: NextRequest) {
+  console.log('[graph-db GET] Request received');
   try {
     // Initialize with connection parameters
+    console.log('[graph-db GET] Ensuring connection...');
     const graphDbType = await ensureConnection(request);
+    console.log(`[graph-db GET] Using database type: ${graphDbType}`);
     const graphDbService = getGraphDbService(graphDbType);
     
     // Get graph data from the database
+    console.log('[graph-db GET] Fetching graph data...');
     const graphData = await graphDbService.getGraphData();
+    console.log(`[graph-db GET] Got ${graphData.nodes.length} nodes, ${graphData.relationships.length} relationships`);
     
     // Transform to format expected by the frontend
     const nodes = graphData.nodes.map(node => ({
diff --git a/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts b/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts
index 66dbe1f..4989b9a 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts
@@ -30,7 +30,7 @@ export async function GET(request: NextRequest) {
     // Initialize services with the correct graph database type
     const graphDbType = getGraphDbType();
     const graphDbService = getGraphDbService(graphDbType);
-    const pineconeService = QdrantService.getInstance();
+    const qdrantService = QdrantService.getInstance();
     
     // Initialize graph database if needed
     if (!graphDbService.isInitialized()) {
@@ -60,7 +60,7 @@ export async function GET(request: NextRequest) {
     // Get total triples (relationships)
     const totalTriples = graphData.relationships.length;
     
-    // Get vector stats from Pinecone if available
+    // Get vector stats from Qdrant if available
     let vectorStats = {
       totalVectors: 0,
       avgQueryTime: 0,
@@ -68,8 +68,8 @@ export async function GET(request: NextRequest) {
     };
     
     try {
-      await pineconeService.initialize();
-      const stats = await pineconeService.getStats();
+      await qdrantService.initialize();
+      const stats = await qdrantService.getStats();
       
       vectorStats = {
         totalVectors: stats.totalVectorCount || 0,
@@ -77,7 +77,7 @@ export async function GET(request: NextRequest) {
         avgRelevanceScore: stats.averageRelevanceScore || 0
       };
     } catch (error) {
-      console.warn('Could not fetch Pinecone stats:', error);
+      console.warn('Could not fetch Qdrant stats:', error);
     }
 
     // Get real query logs instead of mock data
diff --git a/nvidia/txt2kg/assets/frontend/app/api/ollama/route.ts b/nvidia/txt2kg/assets/frontend/app/api/ollama/route.ts
index 819d3c3..7399ead 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/ollama/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/ollama/route.ts
@@ -57,7 +57,7 @@ export async function POST(req: NextRequest) {
   console.log(`[${new Date().toISOString()}] /api/ollama: POST request received`);
   
   try {
-    const { text, model = 'qwen3:1.7b', temperature = 0.1, maxTokens = 8192 } = await req.json();
+    const { text, model = 'qwen3:1.7b', temperature = 0.1, maxTokens = 4096 } = await req.json();
     console.log(`[${new Date().toISOString()}] /api/ollama: Parsed body - model: ${model}, text length: ${text?.length || 0}, maxTokens: ${maxTokens}`);
 
     if (!text || typeof text !== 'string') {
diff --git a/nvidia/txt2kg/assets/frontend/app/api/ollama/tags/route.ts b/nvidia/txt2kg/assets/frontend/app/api/ollama/tags/route.ts
new file mode 100644
index 0000000..6e44ef6
--- /dev/null
+++ b/nvidia/txt2kg/assets/frontend/app/api/ollama/tags/route.ts
@@ -0,0 +1,32 @@
+//
+// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+import { NextResponse } from 'next/server';
+
+/**
+ * Fetch available models from Ollama
+ * GET /api/ollama/tags
+ */
+export async function GET() {
+  const ollamaUrl = process.env.OLLAMA_BASE_URL || 'http://ollama:11434/v1';
+  // Convert /v1 URL to base URL for tags endpoint
+  const baseUrl = ollamaUrl.replace('/v1', '');
+  
+  try {
+    const response = await fetch(`${baseUrl}/api/tags`, {
+      signal: AbortSignal.timeout(5000),
+    });
+    
+    if (!response.ok) {
+      return NextResponse.json({ models: [] }, { status: 200 });
+    }
+    
+    const data = await response.json();
+    return NextResponse.json(data);
+  } catch (error) {
+    // Return empty models array if Ollama is not available
+    return NextResponse.json({ models: [] }, { status: 200 });
+  }
+}
+
diff --git a/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/clear/route.ts b/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/clear/route.ts
index d2491b7..f73d7cf 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/clear/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/clear/route.ts
@@ -1,21 +1,5 @@
-//
-// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 import { NextRequest, NextResponse } from 'next/server';
-import { QdrantService } from '@/lib/qdrant';
+import { PineconeService } from '@/lib/pinecone';
 
 /**
  * Clear all data from the Pinecone vector database
@@ -23,7 +7,7 @@ import { QdrantService } from '@/lib/qdrant';
  */
 export async function POST() {
   // Get the Pinecone service instance
-  const pineconeService = QdrantService.getInstance();
+  const pineconeService = PineconeService.getInstance();
   
   // Clear all vectors from the database
   const deleteSuccess = await pineconeService.deleteAllEntities();
diff --git a/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/create-index/route.ts b/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/create-index/route.ts
index 749dacc..7ce0f5c 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/create-index/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/create-index/route.ts
@@ -1,21 +1,5 @@
-//
-// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 import { NextResponse } from 'next/server';
-import { QdrantService } from '@/lib/qdrant';
+import { PineconeService } from '@/lib/pinecone';
 
 /**
  * Create Pinecone index API endpoint
@@ -24,7 +8,7 @@ import { QdrantService } from '@/lib/qdrant';
 export async function POST() {
   try {
     // Get the Pinecone service instance
-    const pineconeService = QdrantService.getInstance();
+    const pineconeService = PineconeService.getInstance();
     
     // Force re-initialization to create the index
     (pineconeService as any).initialized = false;
diff --git a/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/stats/route.ts b/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/stats/route.ts
index e9159e0..a1aa129 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/stats/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/pinecone-diag/stats/route.ts
@@ -1,21 +1,5 @@
-//
-// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 import { NextRequest, NextResponse } from 'next/server';
-import { QdrantService } from '@/lib/qdrant';
+import { PineconeService } from '@/lib/pinecone';
 
 /**
  * Get Pinecone vector database stats
@@ -23,7 +7,7 @@ import { QdrantService } from '@/lib/qdrant';
 export async function GET() {
   try {
     // Initialize Pinecone service
-    const pineconeService = QdrantService.getInstance();
+    const pineconeService = PineconeService.getInstance();
     
     // We can now directly call getStats() which handles initialization and error recovery
     const stats = await pineconeService.getStats();
diff --git a/nvidia/txt2kg/assets/frontend/app/api/rag-query/route.ts b/nvidia/txt2kg/assets/frontend/app/api/rag-query/route.ts
index 474544f..db0b1fa 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/rag-query/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/rag-query/route.ts
@@ -19,7 +19,7 @@ import RAGService from '@/lib/rag';
 
 /**
  * API endpoint for RAG-based question answering
- * Uses Pinecone for document retrieval and LangChain for generation
+ * Uses Qdrant for document retrieval and LangChain for generation
  * POST /api/rag-query
  */
 export async function POST(req: NextRequest) {
diff --git a/nvidia/txt2kg/assets/frontend/app/api/sentence-embeddings/route.ts b/nvidia/txt2kg/assets/frontend/app/api/sentence-embeddings/route.ts
index 7fc1905..aff933d 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/sentence-embeddings/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/sentence-embeddings/route.ts
@@ -51,7 +51,7 @@ export async function POST(req: NextRequest) {
     // Optionally store in vector database
     if (sentenceEmbeddings.length > 0) {
       try {
-        // Map the embeddings to a format suitable for Pinecone
+        // Map the embeddings to a format suitable for Qdrant
         const embeddingsMap = new Map<string, number[]>();
         const textContentMap = new Map<string, string>();
         const metadataMap = new Map<string, any>();
@@ -64,9 +64,9 @@ export async function POST(req: NextRequest) {
           metadataMap.set(key, item.metadata);
         });
         
-        // Store in Pinecone
-        const pineconeService = QdrantService.getInstance();
-        await pineconeService.storeEmbeddingsWithMetadata(
+        // Store in Qdrant
+        const qdrantService = QdrantService.getInstance();
+        await qdrantService.storeEmbeddingsWithMetadata(
           embeddingsMap,
           textContentMap, 
           metadataMap
diff --git a/nvidia/txt2kg/assets/frontend/app/api/settings/route.ts b/nvidia/txt2kg/assets/frontend/app/api/settings/route.ts
index 5875395..cbb25cd 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/settings/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/settings/route.ts
@@ -17,8 +17,26 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { GraphDBType } from '@/lib/graph-db-service';
 
-// In-memory storage for settings
+// In-memory storage for settings - use lazy initialization for env vars
+// because they're not available at build time, only at runtime
 let serverSettings: Record<string, string> = {};
+let settingsInitialized = false;
+
+function ensureSettingsInitialized() {
+  if (!settingsInitialized) {
+    // Read environment variables at runtime, not build time
+    serverSettings = {
+      graph_db_type: process.env.GRAPH_DB_TYPE || 'arangodb',
+      neo4j_uri: process.env.NEO4J_URI || '',
+      neo4j_user: process.env.NEO4J_USER || process.env.NEO4J_USERNAME || '',
+      neo4j_password: process.env.NEO4J_PASSWORD || '',
+      arangodb_url: process.env.ARANGODB_URL || '',
+      arangodb_db: process.env.ARANGODB_DB || '',
+    };
+    settingsInitialized = true;
+    console.log(`[SETTINGS] Initialized at runtime with GRAPH_DB_TYPE: "${serverSettings.graph_db_type}"`);
+  }
+}
 
 /**
  * API Route to sync client settings with server environment variables
@@ -27,13 +45,16 @@ let serverSettings: Record<string, string> = {};
  */
 export async function POST(request: NextRequest) {
   try {
+    // Ensure settings are initialized from env vars first
+    ensureSettingsInitialized();
+    
     const { settings } = await request.json();
     
     if (!settings || typeof settings !== 'object') {
       return NextResponse.json({ error: 'Settings object is required' }, { status: 400 });
     }
     
-    // Update server settings
+    // Update server settings (merge with existing)
     serverSettings = { ...serverSettings, ...settings };
     
     // Log some important settings for debugging
@@ -58,6 +79,9 @@ export async function POST(request: NextRequest) {
  */
 export async function GET(request: NextRequest) {
   try {
+    // Ensure settings are initialized from env vars first
+    ensureSettingsInitialized();
+    
     const url = new URL(request.url);
     const key = url.searchParams.get('key');
     
@@ -84,12 +108,32 @@ export async function GET(request: NextRequest) {
  * For use in other API routes
  */
 export function getSetting(key: string): string | null {
+  ensureSettingsInitialized();
   return serverSettings[key] || null;
 }
 
 /**
  * Get the currently selected graph database type
+ * Priority: serverSettings > environment variable > default 'arangodb'
  */
 export function getGraphDbType(): GraphDBType {
-  return (serverSettings.graph_db_type as GraphDBType) || 'arangodb';
+  // Ensure settings are initialized from runtime environment variables
+  ensureSettingsInitialized();
+  
+  // Check serverSettings (initialized from env vars or updated by client)
+  if (serverSettings.graph_db_type) {
+    console.log(`[getGraphDbType] Returning: "${serverSettings.graph_db_type}"`);
+    return serverSettings.graph_db_type as GraphDBType;
+  }
+  
+  // Direct fallback to runtime environment variable
+  const envType = process.env.GRAPH_DB_TYPE;
+  if (envType) {
+    console.log(`[getGraphDbType] Returning from env: "${envType}"`);
+    return envType as GraphDBType;
+  }
+  
+  // Default to arangodb for backwards compatibility
+  console.log(`[getGraphDbType] Returning default: "arangodb"`);
+  return 'arangodb';
 } 
\ No newline at end of file
diff --git a/nvidia/txt2kg/assets/frontend/app/api/vector-db/clear/route.ts b/nvidia/txt2kg/assets/frontend/app/api/vector-db/clear/route.ts
new file mode 100644
index 0000000..da81119
--- /dev/null
+++ b/nvidia/txt2kg/assets/frontend/app/api/vector-db/clear/route.ts
@@ -0,0 +1,44 @@
+//
+// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+import { NextRequest, NextResponse } from 'next/server';
+import { QdrantService } from '@/lib/qdrant';
+
+/**
+ * Clear all data from the Qdrant vector database
+ * POST /api/vector-db/clear
+ */
+export async function POST() {
+  // Get the Qdrant service instance
+  const qdrantService = QdrantService.getInstance();
+  
+  // Clear all vectors from the database
+  const deleteSuccess = await qdrantService.deleteAllEntities();
+  
+  // Get updated stats after clearing
+  const stats = await qdrantService.getStats();
+  
+  // Return response based on operation success
+  return NextResponse.json({
+    success: deleteSuccess,
+    message: deleteSuccess 
+      ? 'Successfully cleared all data from Qdrant vector database'
+      : 'Failed to clear Qdrant database - service may not be available',
+    totalVectorCount: stats.totalVectorCount || 0,
+    httpHealthy: stats.httpHealthy || false
+  });
+}
+
diff --git a/nvidia/txt2kg/assets/frontend/app/api/vector-db/create-collection/route.ts b/nvidia/txt2kg/assets/frontend/app/api/vector-db/create-collection/route.ts
new file mode 100644
index 0000000..8fc58cc
--- /dev/null
+++ b/nvidia/txt2kg/assets/frontend/app/api/vector-db/create-collection/route.ts
@@ -0,0 +1,53 @@
+//
+// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+import { NextResponse } from 'next/server';
+import { QdrantService } from '@/lib/qdrant';
+
+/**
+ * Create Qdrant collection API endpoint
+ * POST /api/vector-db/create-collection
+ */
+export async function POST() {
+  try {
+    // Get the Qdrant service instance
+    const qdrantService = QdrantService.getInstance();
+    
+    // Force re-initialization to create the collection
+    (qdrantService as any).initialized = false;
+    await qdrantService.initialize();
+    
+    // Check if initialization was successful by getting stats
+    const stats = await qdrantService.getStats();
+    
+    return NextResponse.json({
+      success: true,
+      message: 'Qdrant collection created successfully',
+      httpHealthy: stats.httpHealthy || false
+    });
+  } catch (error) {
+    console.error('Error creating Qdrant collection:', error);
+    
+    return NextResponse.json(
+      { 
+        success: false,
+        error: `Failed to create Qdrant collection: ${error instanceof Error ? error.message : String(error)}`
+      },
+      { status: 500 }
+    );
+  }
+}
+
diff --git a/nvidia/txt2kg/assets/frontend/app/api/vector-db/stats/route.ts b/nvidia/txt2kg/assets/frontend/app/api/vector-db/stats/route.ts
new file mode 100644
index 0000000..bbc44b9
--- /dev/null
+++ b/nvidia/txt2kg/assets/frontend/app/api/vector-db/stats/route.ts
@@ -0,0 +1,59 @@
+//
+// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+import { NextRequest, NextResponse } from 'next/server';
+import { QdrantService } from '@/lib/qdrant';
+
+/**
+ * Get Qdrant vector database stats
+ */
+export async function GET() {
+  try {
+    // Initialize Qdrant service
+    const qdrantService = QdrantService.getInstance();
+    
+    // We can now directly call getStats() which handles initialization and error recovery
+    const stats = await qdrantService.getStats();
+    
+    return NextResponse.json({
+      ...stats,
+      timestamp: new Date().toISOString()
+    });
+  } catch (error) {
+    console.error('Error getting Qdrant stats:', error);
+    
+    // Return a successful response with error information
+    // This prevents the UI from breaking when Qdrant is unavailable
+    let errorMessage = error instanceof Error ? error.message : String(error);
+    
+    // More specific error message for 404 errors
+    if (errorMessage.includes('404')) {
+      errorMessage = 'Qdrant server returned 404. The server may not be running or the collection does not exist.';
+    }
+    
+    return NextResponse.json(
+      { 
+        error: `Failed to get Qdrant stats: ${errorMessage}`,
+        totalVectorCount: 0,
+        source: 'error',
+        httpHealthy: false,
+        timestamp: new Date().toISOString()
+      },
+      { status: 200 } // Use 200 instead of 500 to avoid UI errors
+    );
+  }
+}
+
diff --git a/nvidia/txt2kg/assets/frontend/app/api/vllm/models/route.ts b/nvidia/txt2kg/assets/frontend/app/api/vllm/models/route.ts
new file mode 100644
index 0000000..be31811
--- /dev/null
+++ b/nvidia/txt2kg/assets/frontend/app/api/vllm/models/route.ts
@@ -0,0 +1,40 @@
+//
+// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+import { NextResponse } from 'next/server';
+
+/**
+ * Fetch available models from vLLM
+ * GET /api/vllm/models
+ */
+export async function GET() {
+  const vllmUrl = process.env.VLLM_BASE_URL || 'http://vllm:8001/v1';
+  
+  try {
+    const response = await fetch(`${vllmUrl}/models`, {
+      signal: AbortSignal.timeout(5000),
+    });
+    
+    if (!response.ok) {
+      return NextResponse.json({ models: [] }, { status: 200 });
+    }
+    
+    const data = await response.json();
+    
+    // vLLM returns OpenAI-compatible format: { data: [{ id: "model-name", ... }] }
+    if (data.data && Array.isArray(data.data)) {
+      const models = data.data.map((model: any) => ({
+        id: model.id,
+        name: model.id,
+      }));
+      return NextResponse.json({ models });
+    }
+    
+    return NextResponse.json({ models: [] });
+  } catch (error) {
+    // Return empty models array if vLLM is not available
+    return NextResponse.json({ models: [] }, { status: 200 });
+  }
+}
+
diff --git a/nvidia/txt2kg/assets/frontend/app/api/vllm/route.ts b/nvidia/txt2kg/assets/frontend/app/api/vllm/route.ts
index 0a72b3e..25742f0 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/vllm/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/vllm/route.ts
@@ -86,7 +86,7 @@ export async function GET(req: NextRequest) {
  */
 export async function POST(req: NextRequest) {
   try {
-    const { text, model = 'meta-llama/Llama-3.2-3B-Instruct', temperature = 0.1, maxTokens = 1024 } = await req.json();
+    const { text, model = process.env.VLLM_MODEL || 'nvidia/Llama-3_3-Nemotron-Super-49B-v1_5-FP8', temperature = 0.1, maxTokens = 1024 } = await req.json();
 
     if (!text || typeof text !== 'string') {
       return NextResponse.json({ error: 'Text is required' }, { status: 400 });
diff --git a/nvidia/txt2kg/assets/frontend/app/globals.css b/nvidia/txt2kg/assets/frontend/app/globals.css
index 74dc189..944b4e3 100644
--- a/nvidia/txt2kg/assets/frontend/app/globals.css
+++ b/nvidia/txt2kg/assets/frontend/app/globals.css
@@ -397,3 +397,88 @@ body {
 /* Light mode: tune specific custom elements */
 .light .glass-card:hover { box-shadow: 0 10px 18px -8px rgba(0,0,0,0.12) !important; }
 .light .startup-tab-icon { box-shadow: 0 1px 3px rgba(0,0,0,0.06) !important; }
+
+/* Progress bar indeterminate animation - smooth sliding with gradient shine */
+@keyframes progress {
+  0% {
+    width: 0%;
+    margin-left: 0%;
+  }
+  50% {
+    width: 40%;
+    margin-left: 30%;
+  }
+  100% {
+    width: 0%;
+    margin-left: 100%;
+  }
+}
+
+.animate-progress {
+  animation: progress 1.8s ease-in-out infinite;
+}
+
+/* Progress bar shimmer effect for determinate progress */
+@keyframes shimmer {
+  0% {
+    transform: translateX(-100%);
+  }
+  100% {
+    transform: translateX(100%);
+  }
+}
+
+.progress-shimmer {
+  position: relative;
+  overflow: hidden;
+}
+
+.progress-shimmer::after {
+  content: "";
+  position: absolute;
+  inset: 0;
+  background: linear-gradient(
+    90deg,
+    transparent 0%,
+    rgba(255, 255, 255, 0.15) 50%,
+    transparent 100%
+  );
+  animation: shimmer 2s ease-in-out infinite;
+}
+
+/* Enhanced skeleton shimmer with directional sweep */
+@keyframes skeleton-shimmer {
+  0% {
+    background-position: -200% 0;
+  }
+  100% {
+    background-position: 200% 0;
+  }
+}
+
+.skeleton-shimmer {
+  background: linear-gradient(
+    90deg,
+    hsl(var(--muted)) 25%,
+    hsl(var(--muted-foreground) / 0.08) 50%,
+    hsl(var(--muted)) 75%
+  );
+  background-size: 200% 100%;
+  animation: skeleton-shimmer 1.5s ease-in-out infinite;
+}
+
+/* Pulse animation for status indicators */
+@keyframes status-pulse {
+  0%, 100% {
+    opacity: 1;
+    transform: scale(1);
+  }
+  50% {
+    opacity: 0.6;
+    transform: scale(0.95);
+  }
+}
+
+.status-pulse {
+  animation: status-pulse 2s ease-in-out infinite;
+}
diff --git a/nvidia/txt2kg/assets/frontend/app/page.tsx b/nvidia/txt2kg/assets/frontend/app/page.tsx
index 2289d69..bb9bc58 100644
--- a/nvidia/txt2kg/assets/frontend/app/page.tsx
+++ b/nvidia/txt2kg/assets/frontend/app/page.tsx
@@ -46,7 +46,6 @@ export default function Home() {
     { value: "edit", label: "Edit Knowledge Graph", Icon: Edit },
     { value: "visualize", label: "Visualize Graph", Icon: Network },
   ] as const;
-  const activeIndex = Math.max(0, steps.findIndex(s => s.value === activeTab));
   
   // Updated to use callback reference
   const handleTabChange = React.useCallback((tab: string) => {
@@ -84,8 +83,8 @@ export default function Home() {
       
       <main className="container mx-auto px-6 py-12 border-b border-border/10">
         
-        <Tabs defaultValue="upload" className="w-full mb-12" onValueChange={setActiveTab}>
-          <TabsList className="nvidia-build-tabs mb-12" aria-label="Workflow steps">
+        <Tabs defaultValue="upload" className="w-full" onValueChange={setActiveTab}>
+          <TabsList className="nvidia-build-tabs mb-10" aria-label="Workflow steps">
             {steps.map(({ value, label, Icon }) => (
               <TabsTrigger
                 key={value}
@@ -106,22 +105,22 @@ export default function Home() {
           </TabsList>
           
           {/* Step 1: Document Upload */}
-          <TabsContent value="upload" className="space-y-8">
+          <TabsContent value="upload" className="nvidia-build-tab-content">
             <UploadTab onTabChange={handleTabChange} />
           </TabsContent>
           
           {/* Step 2: Configure & Process */}
-          <TabsContent value="configure" className="space-y-8">
+          <TabsContent value="configure" className="nvidia-build-tab-content">
             <ConfigureTab />
           </TabsContent>
           
           {/* Step 3: Edit Knowledge */}
-          <TabsContent value="edit" className="space-y-8">
+          <TabsContent value="edit" className="nvidia-build-tab-content">
             <EditTab />
           </TabsContent>
           
           {/* Step 4: Visualize Knowledge Graph */}
-          <TabsContent value="visualize" className="space-y-8">
+          <TabsContent value="visualize" className="nvidia-build-tab-content">
             <VisualizeTab />
           </TabsContent>
         </Tabs>
diff --git a/nvidia/txt2kg/assets/frontend/app/rag/page.tsx b/nvidia/txt2kg/assets/frontend/app/rag/page.tsx
index f5d7186..f3b0399 100644
--- a/nvidia/txt2kg/assets/frontend/app/rag/page.tsx
+++ b/nvidia/txt2kg/assets/frontend/app/rag/page.tsx
@@ -68,7 +68,7 @@ export default function RagPage() {
         }
         
         // Check if vector search is available
-        const vectorResponse = await fetch('/api/pinecone-diag/stats');
+        const vectorResponse = await fetch('/api/vector-db/stats');
         if (vectorResponse.ok) {
           const data = await vectorResponse.json();
           setVectorEnabled(data.totalVectorCount > 0);
@@ -112,7 +112,7 @@ export default function RagPage() {
     });
     
     try {
-      // If using pure RAG (Pinecone + LangChain) without graph search
+      // If using pure RAG (Qdrant + LangChain) without graph search
       if (params.usePureRag) {
         queryMode = 'pure-rag';
         try {
diff --git a/nvidia/txt2kg/assets/frontend/components/advanced-options.tsx b/nvidia/txt2kg/assets/frontend/components/advanced-options.tsx
index 167179f..f2421ba 100644
--- a/nvidia/txt2kg/assets/frontend/components/advanced-options.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/advanced-options.tsx
@@ -14,8 +14,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-import React, { useState } from "react";
-import { ChevronDown, ChevronRight } from "lucide-react";
+import React, { useState, useRef, useEffect } from "react";
+import { ChevronDown } from "lucide-react";
 import { cn } from "@/lib/utils";
 
 interface AdvancedOptionsProps {
@@ -32,28 +32,57 @@ export function AdvancedOptions({
   defaultOpen = false
 }: AdvancedOptionsProps) {
   const [isOpen, setIsOpen] = useState(defaultOpen);
+  const contentRef = useRef<HTMLDivElement>(null);
+  const [contentHeight, setContentHeight] = useState<number | undefined>(
+    defaultOpen ? undefined : 0
+  );
+
+  // Update content height when open state changes
+  useEffect(() => {
+    if (isOpen) {
+      const height = contentRef.current?.scrollHeight;
+      setContentHeight(height);
+      // After animation completes, set to auto for dynamic content
+      const timer = setTimeout(() => setContentHeight(undefined), 200);
+      return () => clearTimeout(timer);
+    } else {
+      // First set to current height, then to 0 for smooth collapse
+      setContentHeight(contentRef.current?.scrollHeight);
+      requestAnimationFrame(() => setContentHeight(0));
+    }
+  }, [isOpen]);
 
   return (
     <div className={cn("border rounded-md overflow-hidden", className)}>
-      <div 
-        className="flex items-center justify-between p-3 bg-muted/30 cursor-pointer hover:bg-muted/50 transition-colors"
+      <button 
+        type="button"
+        className="w-full flex items-center justify-between p-3 bg-muted/30 cursor-pointer hover:bg-muted/50 transition-colors focus-visible:ring-2 focus-visible:ring-nvidia-green focus-visible:ring-inset"
         onClick={() => setIsOpen(!isOpen)}
+        aria-expanded={isOpen}
+        aria-controls="advanced-options-content"
       >
         <h3 className="text-sm font-medium flex items-center">
-          {isOpen ? (
-            <ChevronDown className="h-4 w-4 mr-2" />
-          ) : (
-            <ChevronRight className="h-4 w-4 mr-2" />
-          )}
+          <ChevronDown 
+            className={cn(
+              "h-4 w-4 mr-2 transition-transform duration-200",
+              !isOpen && "-rotate-90"
+            )} 
+          />
           {title}
         </h3>
-      </div>
+      </button>
       
-      {isOpen && (
+      <div 
+        id="advanced-options-content"
+        ref={contentRef}
+        className="overflow-hidden transition-all duration-200 ease-out"
+        style={{ height: contentHeight !== undefined ? contentHeight : 'auto' }}
+        aria-hidden={!isOpen}
+      >
         <div className="p-4 border-t border-border/50">
           {children}
         </div>
-      )}
+      </div>
     </div>
   );
 } 
\ No newline at end of file
diff --git a/nvidia/txt2kg/assets/frontend/components/database-connection.tsx b/nvidia/txt2kg/assets/frontend/components/database-connection.tsx
index 4ddaa35..d68d6eb 100644
--- a/nvidia/txt2kg/assets/frontend/components/database-connection.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/database-connection.tsx
@@ -57,24 +57,34 @@ export function DatabaseConnection({ className }: DatabaseConnectionProps) {
     setGraphError(null)
     
     try {
-      // Get database type from localStorage
-      const graphDbType = localStorage.getItem("graph_db_type") || "arangodb"
+      // Get database type from localStorage, fall back to fetching from server
+      let graphDbType = localStorage.getItem("graph_db_type")
+      if (!graphDbType) {
+        // Fetch server's default (from GRAPH_DB_TYPE env var)
+        try {
+          const settingsRes = await fetch('/api/settings')
+          const settingsData = await settingsRes.json()
+          graphDbType = settingsData.settings?.graph_db_type || 'neo4j'
+        } catch {
+          graphDbType = 'neo4j'
+        }
+      }
       setDbType(graphDbType === "arangodb" ? "ArangoDB" : "Neo4j")
       
       if (graphDbType === "neo4j") {
-        // Neo4j connection logic
+        // Neo4j connection logic - use the unified graph-db endpoint
         const dbUrl = localStorage.getItem("NEO4J_URL")
         const dbUsername = localStorage.getItem("NEO4J_USERNAME")
         const dbPassword = localStorage.getItem("NEO4J_PASSWORD")
         
-        // Add query parameters if credentials exist
+        // Add query parameters with type=neo4j
         const queryParams = new URLSearchParams()
+        queryParams.append("type", "neo4j")
         if (dbUrl) queryParams.append("url", dbUrl)
         if (dbUsername) queryParams.append("username", dbUsername)
         if (dbPassword) queryParams.append("password", dbPassword)
         
-        const queryString = queryParams.toString()
-        const endpoint = queryString ? `/api/neo4j?${queryString}` : '/api/neo4j'
+        const endpoint = `/api/graph-db?${queryParams.toString()}`
         
         const response = await fetch(endpoint)
         
@@ -98,21 +108,21 @@ export function DatabaseConnection({ className }: DatabaseConnectionProps) {
           setConnectionUrl(dbUrl)
         }
       } else {
-        // ArangoDB connection logic
+        // ArangoDB connection logic - use the unified graph-db endpoint with type=arangodb
         const arangoUrl = localStorage.getItem("arango_url") || "http://localhost:8529"
         const arangoDb = localStorage.getItem("arango_db") || "txt2kg"
         const arangoUser = localStorage.getItem("arango_user") || ""
         const arangoPassword = localStorage.getItem("arango_password") || ""
         
-        // Add query parameters if credentials exist
+        // Add query parameters with type=arangodb
         const queryParams = new URLSearchParams()
+        queryParams.append("type", "arangodb")
         if (arangoUrl) queryParams.append("url", arangoUrl)
         if (arangoDb) queryParams.append("dbName", arangoDb)
         if (arangoUser) queryParams.append("username", arangoUser)
         if (arangoPassword) queryParams.append("password", arangoPassword)
         
-        const queryString = queryParams.toString()
-        const endpoint = queryString ? `/api/graph-db?${queryString}` : '/api/graph-db'
+        const endpoint = `/api/graph-db?${queryParams.toString()}`
         
         const response = await fetch(endpoint)
         
@@ -144,7 +154,8 @@ export function DatabaseConnection({ className }: DatabaseConnectionProps) {
   // Disconnect from graph database
   const disconnectGraph = async () => {
     try {
-      const graphDbType = localStorage.getItem("graph_db_type") || "arangodb"
+      // Use current dbType state which was already determined from server/localStorage
+      const graphDbType = dbType === "Neo4j" ? "neo4j" : "arangodb"
       const endpoint = graphDbType === "neo4j" ? '/api/neo4j/disconnect' : '/api/graph-db/disconnect'
       
       const response = await fetch(endpoint, {
@@ -171,7 +182,7 @@ export function DatabaseConnection({ className }: DatabaseConnectionProps) {
   // Fetch vector DB stats
   const fetchVectorStats = async () => {
     try {
-      const response = await fetch('/api/pinecone-diag/stats');
+      const response = await fetch('/api/vector-db/stats');
       const data = await response.json();
 
       if (response.ok) {
@@ -273,7 +284,7 @@ export function DatabaseConnection({ className }: DatabaseConnectionProps) {
     
     try {
       // Call API to clear the database
-      const response = await fetch('/api/pinecone-diag/clear', {
+      const response = await fetch('/api/vector-db/clear', {
         method: 'POST',
       })
       
diff --git a/nvidia/txt2kg/assets/frontend/components/documents-table.tsx b/nvidia/txt2kg/assets/frontend/components/documents-table.tsx
index 020ae5c..b0f535e 100644
--- a/nvidia/txt2kg/assets/frontend/components/documents-table.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/documents-table.tsx
@@ -28,6 +28,16 @@ import {
   DialogHeader,
   DialogTitle,
 } from "@/components/ui/dialog"
+import {
+  AlertDialog,
+  AlertDialogAction,
+  AlertDialogCancel,
+  AlertDialogContent,
+  AlertDialogDescription,
+  AlertDialogFooter,
+  AlertDialogHeader,
+  AlertDialogTitle,
+} from "@/components/ui/alert-dialog"
 import { Button } from "@/components/ui/button"
 import type { Triple } from "@/utils/text-processing"
 import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"
@@ -44,6 +54,10 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
   const [currentDocumentId, setCurrentDocumentId] = useState<string | null>(null)
   const [editableTriples, setEditableTriples] = useState<Triple[]>([])
   const [editingTripleIndex, setEditingTripleIndex] = useState<number | null>(null)
+  
+  // Delete confirmation dialog state
+  const [showDeleteDialog, setShowDeleteDialog] = useState(false)
+  const [deleteTarget, setDeleteTarget] = useState<{ type: 'single' | 'multiple', docId?: string, docName?: string } | null>(null)
 
   // Use shift-select hook for document selection
   const {
@@ -63,11 +77,32 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
 
   const handleDeleteSelected = () => {
     if (selectedDocuments.length === 0) return
-
-    if (confirm(`Are you sure you want to delete ${selectedDocuments.length} selected document(s)?`)) {
+    setDeleteTarget({ type: 'multiple' })
+    setShowDeleteDialog(true)
+  }
+  
+  const handleConfirmDelete = () => {
+    if (!deleteTarget) return
+    
+    if (deleteTarget.type === 'multiple') {
       deleteDocuments(selectedDocuments)
       setSelectedDocuments([])
+      toast({
+        title: "Documents Deleted",
+        description: `Successfully deleted ${selectedDocuments.length} document(s).`,
+        duration: 3000,
+      })
+    } else if (deleteTarget.type === 'single' && deleteTarget.docId) {
+      deleteDocuments([deleteTarget.docId])
+      toast({
+        title: "Document Deleted",
+        description: `"${deleteTarget.docName}" has been deleted.`,
+        duration: 3000,
+      })
     }
+    
+    setShowDeleteDialog(false)
+    setDeleteTarget(null)
   }
   
   const openTriplesDialog = (documentId: string) => {
@@ -249,6 +284,7 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
                             openTriplesDialog(doc.id);
                           }}
                           className="p-2 text-nvidia-green hover:bg-nvidia-green/10 rounded-lg transition-colors"
+                          aria-label={`View and edit ${doc.triples?.length || 0} triples for ${doc.name}`}
                           title="View and edit triples"
                         >
                           <Eye className="h-4 w-4" />
@@ -269,6 +305,7 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
                         // Create a simple info modal or tooltip showing document details
                       }}
                       className="p-2 text-muted-foreground hover:text-nvidia-green hover:bg-nvidia-green/10 rounded-lg transition-colors"
+                      aria-label={`View info for ${doc.name}`}
                       title="View document info"
                     >
                       <Info className="h-4 w-4" />
@@ -294,6 +331,7 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
                         }
                       }}
                       className="p-2 text-muted-foreground hover:text-nvidia-green hover:bg-nvidia-green/10 rounded-lg transition-colors"
+                      aria-label={`Download ${doc.name}`}
                       title="Download document"
                     >
                       <Download className="h-4 w-4" />
@@ -301,11 +339,11 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
                     <button 
                       onClick={(e) => {
                         e.stopPropagation()
-                        if (confirm(`Are you sure you want to delete ${doc.name}?`)) {
-                          deleteDocuments([doc.id])
-                        }
+                        setDeleteTarget({ type: 'single', docId: doc.id, docName: doc.name })
+                        setShowDeleteDialog(true)
                       }}
                       className="p-2 text-muted-foreground hover:text-red-500 hover:bg-red-500/10 rounded-lg transition-colors"
+                      aria-label={`Delete ${doc.name}`}
                       title="Delete document"
                     >
                       <Trash2 className="h-4 w-4" />
@@ -395,6 +433,7 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
                             <button
                               onClick={() => setEditingTripleIndex(null)}
                               className="p-1.5 text-primary hover:text-primary/80 hover:bg-primary/10 rounded-full transition-colors"
+                              aria-label={`Save changes to triple: ${triple.subject} ${triple.predicate} ${triple.object}`}
                               title="Save"
                             >
                               <CheckCircle className="h-4 w-4" />
@@ -403,6 +442,7 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
                             <button
                               onClick={() => setEditingTripleIndex(index)}
                               className="p-1.5 text-muted-foreground hover:text-foreground hover:bg-muted/50 rounded-full transition-colors"
+                              aria-label={`Edit triple: ${triple.subject} ${triple.predicate} ${triple.object}`}
                               title="Edit"
                             >
                               <Edit className="h-4 w-4" />
@@ -411,6 +451,7 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
                           <button
                             onClick={() => deleteTriple(index)}
                             className="p-1.5 text-muted-foreground hover:text-destructive hover:bg-destructive/10 rounded-full transition-colors"
+                            aria-label={`Delete triple: ${triple.subject} ${triple.predicate} ${triple.object}`}
                             title="Delete"
                           >
                             <Trash2 className="h-4 w-4" />
@@ -431,6 +472,40 @@ export function DocumentsTable({ onTabChange }: DocumentsTableProps) {
           </div>
         </DialogContent>
       </Dialog>
+      
+      {/* Delete Confirmation Dialog */}
+      <AlertDialog open={showDeleteDialog} onOpenChange={setShowDeleteDialog}>
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle className="flex items-center gap-2">
+              <Trash2 className="h-5 w-5 text-destructive" />
+              Delete {deleteTarget?.type === 'multiple' ? 'Documents' : 'Document'}
+            </AlertDialogTitle>
+            <AlertDialogDescription>
+              {deleteTarget?.type === 'multiple' ? (
+                <>
+                  Are you sure you want to delete <strong>{selectedDocuments.length}</strong> selected document{selectedDocuments.length !== 1 ? 's' : ''}? 
+                  This action cannot be undone.
+                </>
+              ) : (
+                <>
+                  Are you sure you want to delete <strong>"{deleteTarget?.docName}"</strong>? 
+                  This action cannot be undone.
+                </>
+              )}
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+          <AlertDialogFooter>
+            <AlertDialogCancel onClick={() => setDeleteTarget(null)}>Cancel</AlertDialogCancel>
+            <AlertDialogAction 
+              onClick={handleConfirmDelete}
+              className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
+            >
+              Delete
+            </AlertDialogAction>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
     </div>
   )
 }
diff --git a/nvidia/txt2kg/assets/frontend/components/graph-actions.tsx b/nvidia/txt2kg/assets/frontend/components/graph-actions.tsx
index c3fb862..5250042 100644
--- a/nvidia/txt2kg/assets/frontend/components/graph-actions.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/graph-actions.tsx
@@ -19,6 +19,7 @@
 import { Network, Zap } from "lucide-react"
 import { useDocuments } from "@/contexts/document-context"
 import { Loader2 } from "lucide-react"
+import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"
 
 export function GraphActions() {
   const { documents, processDocuments, isProcessing, openGraphVisualization } = useDocuments()
@@ -50,34 +51,67 @@ export function GraphActions() {
     }
   }
 
+  // Helper to get tooltip content for disabled Process button
+  const getProcessTooltip = () => {
+    if (isProcessing) return "Processing in progress..."
+    if (!hasNewDocuments && documents.length === 0) return "Upload documents first to extract knowledge triples"
+    if (!hasNewDocuments) return "All documents have been processed"
+    return "Extract knowledge triples from uploaded documents"
+  }
+
+  // Helper to get tooltip content for disabled View Graph button
+  const getViewGraphTooltip = () => {
+    if (isProcessing) return "Wait for processing to complete"
+    if (!hasProcessedDocuments && documents.length === 0) return "Upload and process documents first"
+    if (!hasProcessedDocuments) return "Process documents first to generate knowledge triples"
+    return "Visualize the knowledge graph from extracted triples"
+  }
+
   return (
-    <div className="flex gap-3 items-center">
-      <button
-        className={`btn-primary ${!hasNewDocuments || isProcessing ? "opacity-60 cursor-not-allowed" : ""}`}
-        disabled={!hasNewDocuments || isProcessing}
-        onClick={handleProcessDocuments}
-      >
-        {isProcessing ? (
-          <>
-            <Loader2 className="h-4 w-4 animate-spin" />
-            Processing...
-          </>
-        ) : (
-          <>
-            <Zap className="h-4 w-4" />
-            Process Documents
-          </>
-        )}
-      </button>
-      <button
-        className={`btn-primary ${!hasProcessedDocuments || isProcessing ? "opacity-60 cursor-not-allowed" : ""}`}
-        disabled={!hasProcessedDocuments || isProcessing}
-        onClick={() => openGraphVisualization()}
-      >
-        <Network className="h-4 w-4" />
-        View Knowledge Graph
-      </button>
-    </div>
+    <TooltipProvider>
+      <div className="flex gap-3 items-center">
+        <Tooltip>
+          <TooltipTrigger asChild>
+            <button
+              className={`btn-primary ${!hasNewDocuments || isProcessing ? "opacity-60 cursor-not-allowed" : ""}`}
+              disabled={!hasNewDocuments || isProcessing}
+              onClick={handleProcessDocuments}
+            >
+              {isProcessing ? (
+                <>
+                  <Loader2 className="h-4 w-4 animate-spin" />
+                  Processing...
+                </>
+              ) : (
+                <>
+                  <Zap className="h-4 w-4" />
+                  Process Documents
+                </>
+              )}
+            </button>
+          </TooltipTrigger>
+          <TooltipContent>
+            <p>{getProcessTooltip()}</p>
+          </TooltipContent>
+        </Tooltip>
+        
+        <Tooltip>
+          <TooltipTrigger asChild>
+            <button
+              className={`btn-primary ${!hasProcessedDocuments || isProcessing ? "opacity-60 cursor-not-allowed" : ""}`}
+              disabled={!hasProcessedDocuments || isProcessing}
+              onClick={() => openGraphVisualization()}
+            >
+              <Network className="h-4 w-4" />
+              View Knowledge Graph
+            </button>
+          </TooltipTrigger>
+          <TooltipContent>
+            <p>{getViewGraphTooltip()}</p>
+          </TooltipContent>
+        </Tooltip>
+      </div>
+    </TooltipProvider>
   )
 }
 
diff --git a/nvidia/txt2kg/assets/frontend/components/llm-selector-compact.tsx b/nvidia/txt2kg/assets/frontend/components/llm-selector-compact.tsx
index 59f9b60..4d6832f 100644
--- a/nvidia/txt2kg/assets/frontend/components/llm-selector-compact.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/llm-selector-compact.tsx
@@ -17,7 +17,7 @@
 "use client"
 
 import { useState, useEffect } from "react"
-import { ChevronDown, Cpu } from "lucide-react"
+import { ChevronDown, Cpu, Server, RefreshCw } from "lucide-react"
 import { OllamaIcon } from "@/components/ui/ollama-icon"
 
 interface LLMModel {
@@ -28,15 +28,8 @@ interface LLMModel {
   description?: string
 }
 
-// Default models
-const DEFAULT_MODELS: LLMModel[] = [
-  {
-    id: "ollama-llama3.1:8b",
-    name: "Llama 3.1 8B",
-    model: "llama3.1:8b",
-    provider: "ollama",
-    description: "Local Ollama model"
-  },
+// NVIDIA API models (always available if API key is set)
+const NVIDIA_MODELS: LLMModel[] = [
   {
     id: "nvidia-nemotron-super",
     name: "Nemotron Super 49B",
@@ -54,51 +47,100 @@ const DEFAULT_MODELS: LLMModel[] = [
 ]
 
 export function LLMSelectorCompact() {
-  const [models, setModels] = useState<LLMModel[]>(DEFAULT_MODELS)
-  const [selectedModel, setSelectedModel] = useState<LLMModel>(DEFAULT_MODELS[0])
+  const [models, setModels] = useState<LLMModel[]>([])
+  const [selectedModel, setSelectedModel] = useState<LLMModel | null>(null)
   const [isOpen, setIsOpen] = useState(false)
+  const [isLoading, setIsLoading] = useState(true)
 
-  // Load Ollama models from settings
-  useEffect(() => {
-    try {
-      const selectedOllamaModels = localStorage.getItem("selected_ollama_models")
-      if (selectedOllamaModels) {
-        const modelNames: string[] = JSON.parse(selectedOllamaModels)
-        const ollamaModels: LLMModel[] = modelNames.map(name => ({
-          id: `ollama-${name}`,
-          name: name,
-          model: name,
-          provider: "ollama",
-          description: "Local Ollama model"
-        }))
-        
-        // Combine with default models, avoiding duplicates
-        const defaultOllamaIds = DEFAULT_MODELS
-          .filter(m => m.provider === "ollama")
-          .map(m => m.model)
-        const uniqueOllamaModels = ollamaModels.filter(
-          m => !defaultOllamaIds.includes(m.model)
-        )
-        
-        const allModels = [...DEFAULT_MODELS, ...uniqueOllamaModels]
-        setModels(allModels)
-      }
-    } catch (error) {
-      console.error("Error loading Ollama models:", error)
-    }
-  }, [])
+  // Fetch available models from running backends
+  const fetchAvailableModels = async () => {
+    setIsLoading(true)
+    const availableModels: LLMModel[] = []
 
-  // Load selected model from localStorage
-  useEffect(() => {
+    // Check vLLM first (port 8001)
     try {
-      const saved = localStorage.getItem("selectedModelForRAG")
-      if (saved) {
-        const savedModel: LLMModel = JSON.parse(saved)
-        setSelectedModel(savedModel)
+      const vllmResponse = await fetch('/api/vllm/models', { 
+        signal: AbortSignal.timeout(3000) 
+      })
+      if (vllmResponse.ok) {
+        const data = await vllmResponse.json()
+        if (data.models && Array.isArray(data.models)) {
+          data.models.forEach((model: any) => {
+            const modelId = model.id || model.name || model
+            availableModels.push({
+              id: `vllm-${modelId}`,
+              name: modelId.split('/').pop() || modelId,
+              model: modelId,
+              provider: "vllm",
+              description: "vLLM (GPU-accelerated)"
+            })
+          })
+        }
       }
-    } catch (error) {
-      console.error("Error loading selected model:", error)
+    } catch (e) {
+      // vLLM not available
+      console.log("vLLM not available")
     }
+
+    // Check Ollama (port 11434)
+    try {
+      const ollamaResponse = await fetch('/api/ollama/tags', { 
+        signal: AbortSignal.timeout(3000) 
+      })
+      if (ollamaResponse.ok) {
+        const data = await ollamaResponse.json()
+        if (data.models && Array.isArray(data.models)) {
+          data.models.forEach((model: any) => {
+            const modelName = model.name || model
+            availableModels.push({
+              id: `ollama-${modelName}`,
+              name: modelName,
+              model: modelName,
+              provider: "ollama",
+              description: "Local Ollama model"
+            })
+          })
+        }
+      }
+    } catch (e) {
+      // Ollama not available
+      console.log("Ollama not available")
+    }
+
+    // Always add NVIDIA API models
+    availableModels.push(...NVIDIA_MODELS)
+
+    setModels(availableModels)
+    
+    // Set default selected model
+    if (availableModels.length > 0) {
+      // Try to restore saved selection
+      try {
+        const saved = localStorage.getItem("selectedModelForRAG")
+        if (saved) {
+          const savedModel: LLMModel = JSON.parse(saved)
+          const found = availableModels.find(m => m.id === savedModel.id)
+          if (found) {
+            setSelectedModel(found)
+            setIsLoading(false)
+            return
+          }
+        }
+      } catch (e) {
+        // Ignore
+      }
+      
+      // Default to first available local model (vLLM or Ollama), not NVIDIA API
+      const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama")
+      setSelectedModel(localModel || availableModels[0])
+    }
+    
+    setIsLoading(false)
+  }
+
+  // Fetch models on mount
+  useEffect(() => {
+    fetchAvailableModels()
   }, [])
 
   // Save selected model to localStorage and dispatch event
@@ -117,14 +159,55 @@ export function LLMSelectorCompact() {
     if (provider === "ollama") {
       return <OllamaIcon className="h-3 w-3 text-orange-500" />
     }
+    if (provider === "vllm") {
+      return <Server className="h-3 w-3 text-purple-500" />
+    }
     return <Cpu className="h-3 w-3 text-green-500" />
   }
 
+  const getProviderLabel = (provider: string) => {
+    switch (provider) {
+      case "ollama": return "Ollama"
+      case "vllm": return "vLLM"
+      case "nvidia": return "NVIDIA API"
+      default: return provider
+    }
+  }
+
+  if (isLoading) {
+    return (
+      <div className="flex items-center gap-2 px-3 py-1.5 text-sm border border-border/40 rounded-lg bg-background/50">
+        <RefreshCw className="h-3 w-3 animate-spin text-muted-foreground" />
+        <span className="text-muted-foreground">Loading models...</span>
+      </div>
+    )
+  }
+
+  if (!selectedModel) {
+    return (
+      <div className="flex items-center gap-2 px-3 py-1.5 text-sm border border-border/40 rounded-lg bg-background/50 text-muted-foreground">
+        No models available
+      </div>
+    )
+  }
+
+  // Group models by provider
+  const groupedModels = models.reduce((acc, model) => {
+    if (!acc[model.provider]) {
+      acc[model.provider] = []
+    }
+    acc[model.provider].push(model)
+    return acc
+  }, {} as Record<string, LLMModel[]>)
+
   return (
     <div className="relative">
       <button
         type="button"
         onClick={() => setIsOpen(!isOpen)}
+        aria-haspopup="listbox"
+        aria-expanded={isOpen}
+        aria-label={`Select LLM model. Currently selected: ${selectedModel.name}`}
         className="flex items-center gap-2 px-3 py-1.5 text-sm border border-border/40 rounded-lg bg-background/50 hover:bg-muted/30 transition-colors"
       >
         {getModelIcon(selectedModel.provider)}
@@ -141,37 +224,61 @@ export function LLMSelectorCompact() {
           />
           
           {/* Dropdown */}
-          <div className="absolute top-full left-0 mt-2 w-64 border border-border/40 rounded-lg bg-popover shadow-lg z-50 overflow-hidden">
-            <div className="p-2 border-b border-border/40 bg-muted/30">
+          <div 
+            className="absolute top-full left-0 mt-2 w-72 border border-border/40 rounded-lg bg-popover shadow-lg z-50 overflow-hidden"
+            role="listbox"
+            aria-label="Available LLM models"
+          >
+            <div className="p-2 border-b border-border/40 bg-muted/30 flex items-center justify-between">
               <h4 className="text-xs font-semibold text-foreground">Select LLM for Answer Generation</h4>
+              <button
+                type="button"
+                onClick={(e) => {
+                  e.stopPropagation()
+                  fetchAvailableModels()
+                }}
+                className="p-1 hover:bg-muted/50 rounded"
+                title="Refresh models"
+              >
+                <RefreshCw className="h-3 w-3 text-muted-foreground" />
+              </button>
             </div>
-            <div className="max-h-64 overflow-y-auto">
-              {models.map((model) => (
-                <button
-                  key={model.id}
-                  type="button"
-                  onClick={() => handleSelectModel(model)}
-                  className={`w-full flex items-start gap-2 p-3 hover:bg-muted/50 transition-colors text-left ${
-                    selectedModel.id === model.id ? 'bg-nvidia-green/10' : ''
-                  }`}
-                >
-                  <div className="mt-0.5">
-                    {getModelIcon(model.provider)}
+            <div className="max-h-80 overflow-y-auto">
+              {Object.entries(groupedModels).map(([provider, providerModels]) => (
+                <div key={provider}>
+                  <div className="px-3 py-1.5 text-xs font-semibold text-muted-foreground bg-muted/20 border-b border-border/20">
+                    {getProviderLabel(provider)}
                   </div>
-                  <div className="flex-1 min-w-0">
-                    <div className="text-sm font-medium text-foreground truncate">
-                      {model.name}
-                    </div>
-                    {model.description && (
-                      <div className="text-xs text-muted-foreground">
-                        {model.description}
+                  {providerModels.map((model) => (
+                    <button
+                      key={model.id}
+                      type="button"
+                      role="option"
+                      aria-selected={selectedModel.id === model.id}
+                      onClick={() => handleSelectModel(model)}
+                      className={`w-full flex items-start gap-2 p-3 hover:bg-muted/50 transition-colors text-left ${
+                        selectedModel.id === model.id ? 'bg-nvidia-green/10' : ''
+                      }`}
+                    >
+                      <div className="mt-0.5">
+                        {getModelIcon(model.provider)}
                       </div>
-                    )}
-                  </div>
-                  {selectedModel.id === model.id && (
-                    <div className="w-2 h-2 rounded-full bg-nvidia-green flex-shrink-0 mt-1.5" />
-                  )}
-                </button>
+                      <div className="flex-1 min-w-0">
+                        <div className="text-sm font-medium text-foreground truncate">
+                          {model.name}
+                        </div>
+                        {model.description && (
+                          <div className="text-xs text-muted-foreground">
+                            {model.description}
+                          </div>
+                        )}
+                      </div>
+                      {selectedModel.id === model.id && (
+                        <div className="w-2 h-2 rounded-full bg-nvidia-green flex-shrink-0 mt-1.5" />
+                      )}
+                    </button>
+                  ))}
+                </div>
               ))}
             </div>
           </div>
@@ -180,4 +287,3 @@ export function LLMSelectorCompact() {
     </div>
   )
 }
-
diff --git a/nvidia/txt2kg/assets/frontend/components/model-selector.tsx b/nvidia/txt2kg/assets/frontend/components/model-selector.tsx
index 0812c18..9bf463c 100644
--- a/nvidia/txt2kg/assets/frontend/components/model-selector.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/model-selector.tsx
@@ -17,12 +17,22 @@
 "use client"
 
 import { useState, useEffect, useRef } from "react"
-import { createPortal } from "react-dom"
-import { ChevronDown, Sparkles, Cpu, Server } from "lucide-react"
+import { ChevronDown, Cpu, Server, RefreshCw } from "lucide-react"
 import { OllamaIcon } from "@/components/ui/ollama-icon"
 
-// Base models - NVIDIA NeMo as default (first in list)
-const baseModels = [
+interface Model {
+  id: string
+  name: string
+  icon: React.ReactNode
+  description: string
+  model: string
+  baseURL: string
+  provider: string
+  apiKeyName?: string
+}
+
+// NVIDIA API models (always available)
+const NVIDIA_MODELS: Model[] = [
   {
     id: "nvidia-nemotron",
     name: "NVIDIA Llama 3.3 Nemotron Super 49B",
@@ -31,6 +41,7 @@ const baseModels = [
     model: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
     apiKeyName: "NVIDIA_API_KEY",
     baseURL: "https://integrate.api.nvidia.com/v1",
+    provider: "nvidia",
   },
   {
     id: "nvidia-nemotron-nano",
@@ -40,68 +51,116 @@ const baseModels = [
     model: "nvidia/nvidia-nemotron-nano-9b-v2",
     apiKeyName: "NVIDIA_API_KEY",
     baseURL: "https://integrate.api.nvidia.com/v1",
-  },
-  // Preset Ollama model
-  {
-    id: "ollama-llama3.1:8b",
-    name: "Ollama llama3.1:8b",
-    icon: <OllamaIcon className="h-4 w-4 text-orange-500" />,
-    description: "Local Ollama server with llama3.1:8b model",
-    model: "llama3.1:8b",
-    baseURL: "http://localhost:11434/v1",
-    provider: "ollama",
+    provider: "nvidia",
   },
 ]
 
-// vLLM models removed per user request
-
-// Helper function to create Ollama model objects
-const createOllamaModel = (modelName: string) => ({
+// Helper to create model objects
+const createOllamaModel = (modelName: string): Model => ({
   id: `ollama-${modelName}`,
   name: `Ollama ${modelName}`,
   icon: <OllamaIcon className="h-4 w-4 text-orange-500" />,
-  description: `Local Ollama server with ${modelName} model`,
+  description: `Local Ollama model`,
   model: modelName,
   baseURL: "http://localhost:11434/v1",
   provider: "ollama",
 })
 
+const createVllmModel = (modelName: string): Model => ({
+  id: `vllm-${modelName}`,
+  name: modelName.split('/').pop() || modelName,
+  icon: <Server className="h-4 w-4 text-purple-500" />,
+  description: "vLLM (GPU-accelerated)",
+  model: modelName,
+  baseURL: "http://localhost:8001/v1",
+  provider: "vllm",
+})
+
 export function ModelSelector() {
-  const [models, setModels] = useState(() => [...baseModels])
-  const [selectedModel, setSelectedModel] = useState(() => {
-    // Try to find a default Ollama model first
-    const defaultOllama = models.find(m => m.provider === "ollama")
-    return defaultOllama || models[0]
-  })
+  const [models, setModels] = useState<Model[]>([])
+  const [selectedModel, setSelectedModel] = useState<Model | null>(null)
   const [isOpen, setIsOpen] = useState(false)
+  const [isLoading, setIsLoading] = useState(true)
   const buttonRef = useRef<HTMLButtonElement | null>(null)
   const containerRef = useRef<HTMLDivElement | null>(null)
   const [mounted, setMounted] = useState(false)
 
-  // Load configured Ollama models
-  const loadOllamaModels = () => {
+  // Fetch available models from running backends
+  const fetchAvailableModels = async () => {
+    setIsLoading(true)
+    const availableModels: Model[] = []
+
+    // Check vLLM first (port 8001)
     try {
-      const selectedOllamaModels = localStorage.getItem("selected_ollama_models")
-      if (selectedOllamaModels) {
-        const modelNames = JSON.parse(selectedOllamaModels)
-        // Filter out models that are already in baseModels to avoid duplicates
-        const baseModelNames = baseModels.filter(m => m.provider === "ollama").map(m => m.model)
-        const filteredModelNames = modelNames.filter((name: string) => !baseModelNames.includes(name))
-        const ollamaModels = filteredModelNames.map(createOllamaModel)
-        const newModels = [...baseModels, ...ollamaModels]
-        setModels(newModels)
-        return newModels
+      const vllmResponse = await fetch('/api/vllm/models', { 
+        signal: AbortSignal.timeout(3000) 
+      })
+      if (vllmResponse.ok) {
+        const data = await vllmResponse.json()
+        if (data.models && Array.isArray(data.models)) {
+          data.models.forEach((model: any) => {
+            const modelId = model.id || model.name || model
+            availableModels.push(createVllmModel(modelId))
+          })
+        }
       }
-    } catch (error) {
-      console.error("Error loading Ollama models:", error)
+    } catch (e) {
+      console.log("vLLM not available")
     }
-    // Return base models if no Ollama models configured
-    return [...baseModels]
+
+    // Check Ollama (port 11434)
+    try {
+      const ollamaResponse = await fetch('/api/ollama/tags', { 
+        signal: AbortSignal.timeout(3000) 
+      })
+      if (ollamaResponse.ok) {
+        const data = await ollamaResponse.json()
+        if (data.models && Array.isArray(data.models)) {
+          data.models.forEach((model: any) => {
+            const modelName = model.name || model
+            availableModels.push(createOllamaModel(modelName))
+          })
+        }
+      }
+    } catch (e) {
+      console.log("Ollama not available")
+    }
+
+    // Always add NVIDIA API models
+    availableModels.push(...NVIDIA_MODELS)
+
+    setModels(availableModels)
+    
+    // Set default selected model
+    if (availableModels.length > 0) {
+      // Try to restore saved selection
+      try {
+        const saved = localStorage.getItem("selectedModel")
+        if (saved) {
+          const savedModel = JSON.parse(saved)
+          const found = availableModels.find(m => m.id === savedModel.id)
+          if (found) {
+            setSelectedModel(found)
+            setIsLoading(false)
+            return
+          }
+        }
+      } catch (e) {
+        // Ignore
+      }
+      
+      // Default to first available local model (vLLM or Ollama)
+      const localModel = availableModels.find(m => m.provider === "vllm" || m.provider === "ollama")
+      setSelectedModel(localModel || availableModels[0])
+    }
+    
+    setIsLoading(false)
   }
 
   // Dispatch custom event when model changes
-  const updateSelectedModel = (model: any) => {
+  const updateSelectedModel = (model: Model) => {
     setSelectedModel(model)
+    localStorage.setItem("selectedModel", JSON.stringify(model))
     
     // Dispatch a custom event with the selected model data
     const event = new CustomEvent('modelSelected', {
@@ -110,59 +169,11 @@ export function ModelSelector() {
     window.dispatchEvent(event)
   }
 
+  // Fetch models on mount
   useEffect(() => {
-    // Save selected model to localStorage
-    localStorage.setItem("selectedModel", JSON.stringify(selectedModel))
-  }, [selectedModel])
-
-  // Initialize models and selected model
-  useEffect(() => {
-    const loadedModels = loadOllamaModels()
-    
-    // Try to restore selected model from localStorage
-    const savedModel = localStorage.getItem("selectedModel")
-    if (savedModel) {
-      try {
-        const parsed = JSON.parse(savedModel)
-        // Find matching model in our current models array
-        const matchingModel = loadedModels.find(m => m.id === parsed.id)
-        if (matchingModel) {
-          updateSelectedModel(matchingModel)
-        } else {
-          // If saved model not found, use first available model
-          updateSelectedModel(loadedModels[0])
-        }
-      } catch (e) {
-        console.error("Error parsing saved model", e)
-        updateSelectedModel(loadedModels[0])
-      }
-    } else {
-      // If no model in localStorage, use first available model
-      updateSelectedModel(loadedModels[0])
-    }
+    fetchAvailableModels()
   }, [])
 
-  // Listen for Ollama model updates
-  useEffect(() => {
-    const handleOllamaUpdate = (event: CustomEvent) => {
-      console.log("Ollama models updated, reloading...")
-      const newModels = loadOllamaModels()
-      
-      // Check if current selected model still exists
-      const currentModelStillExists = newModels.find(m => m.id === selectedModel.id)
-      if (!currentModelStillExists) {
-        // Select first available model if current one is no longer available
-        updateSelectedModel(newModels[0])
-      }
-    }
-
-    window.addEventListener('ollama-models-updated', handleOllamaUpdate as EventListener)
-    
-    return () => {
-      window.removeEventListener('ollama-models-updated', handleOllamaUpdate as EventListener)
-    }
-  }, [selectedModel.id])
-
   // Set mounted state after component mounts (for SSR compatibility)
   useEffect(() => {
     setMounted(true)
@@ -186,6 +197,55 @@ export function ModelSelector() {
     }
   }, [])
 
+  // Listen for Ollama model updates
+  useEffect(() => {
+    const handleOllamaUpdate = () => {
+      console.log("Ollama models updated, reloading...")
+      fetchAvailableModels()
+    }
+
+    window.addEventListener('ollama-models-updated', handleOllamaUpdate)
+    
+    return () => {
+      window.removeEventListener('ollama-models-updated', handleOllamaUpdate)
+    }
+  }, [])
+
+  if (isLoading) {
+    return (
+      <div className="flex items-center gap-2 bg-card border border-border rounded-lg px-4 py-2 text-sm">
+        <RefreshCw className="h-4 w-4 animate-spin text-muted-foreground" />
+        <span className="text-muted-foreground">Loading models...</span>
+      </div>
+    )
+  }
+
+  if (!selectedModel) {
+    return (
+      <div className="flex items-center gap-2 bg-card border border-border rounded-lg px-4 py-2 text-sm text-muted-foreground">
+        No models available
+      </div>
+    )
+  }
+
+  // Group models by provider
+  const groupedModels = models.reduce((acc, model) => {
+    if (!acc[model.provider]) {
+      acc[model.provider] = []
+    }
+    acc[model.provider].push(model)
+    return acc
+  }, {} as Record<string, Model[]>)
+
+  const getProviderLabel = (provider: string) => {
+    switch (provider) {
+      case "ollama": return "Ollama (Local)"
+      case "vllm": return "vLLM (GPU-accelerated)"
+      case "nvidia": return "NVIDIA API (Cloud)"
+      default: return provider
+    }
+  }
+
   return (
     <div ref={containerRef} className="relative">
       <button
@@ -202,35 +262,57 @@ export function ModelSelector() {
 
       {isOpen && mounted && (
         <div 
-          className="absolute bg-card border border-border rounded-md shadow-md overflow-hidden max-h-80 overflow-y-auto z-50"
+          className="absolute bg-card border border-border rounded-md shadow-md overflow-hidden max-h-96 overflow-y-auto z-50"
           style={{
-            width: "288px",
+            width: "320px",
             bottom: "calc(100% + 4px)",
             left: 0,
           }}
         >
-          <ul className="divide-y divide-border/60">
-            {models.map((model) => (
-              <li key={model.id}>
-                <button
-                  className={`w-full text-left px-3 py-2 hover:bg-muted/30 text-sm flex flex-col gap-1 ${model.id === selectedModel.id ? 'bg-primary/10' : ''}`}
-                  onClick={() => {
-                    updateSelectedModel(model)
-                    setIsOpen(false)
-                  }}
-                >
-                  <span className="flex items-center gap-2">
-                    {model.icon}
-                    <span className={`font-medium ${model.id === selectedModel.id ? 'text-primary' : ''}`}>{model.name}</span>
-                  </span>
-                  <span className="text-xs text-muted-foreground pl-6">{model.description}</span>
-                </button>
-              </li>
+          <div className="px-3 py-2 border-b border-border/60 bg-muted/30 flex items-center justify-between">
+            <span className="text-xs font-semibold text-foreground">Select Model</span>
+            <button
+              type="button"
+              onClick={(e) => {
+                e.stopPropagation()
+                fetchAvailableModels()
+              }}
+              className="p-1 hover:bg-muted/50 rounded"
+              title="Refresh models"
+            >
+              <RefreshCw className="h-3 w-3 text-muted-foreground" />
+            </button>
+          </div>
+          <div>
+            {Object.entries(groupedModels).map(([provider, providerModels]) => (
+              <div key={provider}>
+                <div className="px-3 py-1.5 text-xs font-semibold text-muted-foreground bg-muted/20 border-b border-border/20">
+                  {getProviderLabel(provider)}
+                </div>
+                <ul>
+                  {providerModels.map((model) => (
+                    <li key={model.id}>
+                      <button
+                        className={`w-full text-left px-3 py-2 hover:bg-muted/30 text-sm flex flex-col gap-1 ${model.id === selectedModel.id ? 'bg-primary/10' : ''}`}
+                        onClick={() => {
+                          updateSelectedModel(model)
+                          setIsOpen(false)
+                        }}
+                      >
+                        <span className="flex items-center gap-2">
+                          {model.icon}
+                          <span className={`font-medium ${model.id === selectedModel.id ? 'text-primary' : ''}`}>{model.name}</span>
+                        </span>
+                        <span className="text-xs text-muted-foreground pl-6">{model.description}</span>
+                      </button>
+                    </li>
+                  ))}
+                </ul>
+              </div>
             ))}
-          </ul>
+          </div>
         </div>
       )}
     </div>
   )
 }
-
diff --git a/nvidia/txt2kg/assets/frontend/components/pinecone-connection.tsx b/nvidia/txt2kg/assets/frontend/components/pinecone-connection.tsx
index b64ea4d..4d74753 100644
--- a/nvidia/txt2kg/assets/frontend/components/pinecone-connection.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/pinecone-connection.tsx
@@ -1,19 +1,3 @@
-//
-// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 "use client"
 
 import { useState, useEffect } from "react"
@@ -103,7 +87,7 @@ export function PineconeConnection({ className }: PineconeConnectionProps) {
               <InfoIcon className="h-5 w-5 text-muted-foreground" />
             </TooltipTrigger>
             <TooltipContent>
-              <p>Qdrant stores vector embeddings for semantic search</p>
+              <p>Local Pinecone stores vector embeddings in memory for semantic search</p>
             </TooltipContent>
           </Tooltip>
         </TooltipProvider>
@@ -125,34 +109,34 @@ export function PineconeConnection({ className }: PineconeConnectionProps) {
           <p className="whitespace-normal break-words">Error: {error}</p>
           {error.includes('404') && (
             <p className="mt-1 text-xs">
-              The Qdrant server is running but the collection doesn't exist yet.
-              <button
+              The Pinecone server is running but the index doesn't exist yet. 
+              <button 
                 onClick={async () => {
                   setConnectionStatus("checking");
                   setError(null);
                   try {
                     const response = await fetch('/api/pinecone-diag/create-index', { method: 'POST' });
                     if (response.ok) {
-                      // Wait a bit for the collection to be created
+                      // Wait a bit for the index to be created
                       await new Promise(resolve => setTimeout(resolve, 2000));
                       checkConnection();
                     } else {
                       const data = await response.json();
-                      setError(data.error || 'Failed to create collection');
+                      setError(data.error || 'Failed to create index');
                       setConnectionStatus("disconnected");
                     }
                   } catch (err) {
-                    setError(err instanceof Error ? err.message : 'Error creating collection');
+                    setError(err instanceof Error ? err.message : 'Error creating index');
                     setConnectionStatus("disconnected");
                   }
                 }}
                 className="ml-1 text-blue-600 hover:text-blue-800 underline"
               >
-                Click here to create the collection
+                Click here to create the index
               </button>
               <br />
               <span className="text-xs text-gray-600">Or using Docker Compose: </span>
-              <code className="mx-1 px-1 bg-gray-100 rounded">docker compose restart qdrant</code>
+              <code className="mx-1 px-1 bg-gray-100 rounded">docker-compose restart pinecone</code>
             </p>
           )}
         </div>
@@ -160,25 +144,13 @@ export function PineconeConnection({ className }: PineconeConnectionProps) {
       
       <div className="text-sm space-y-1 w-full">
         <div className="flex justify-between">
-          <span className="text-muted-foreground">Qdrant</span>
-          <span className="text-xs text-muted-foreground">{(stats as any).url || 'http://qdrant:6333'}</span>
+          <span className="text-muted-foreground">Vectors:</span>
+          <span>{stats.nodes}</span>
         </div>
         <div className="flex justify-between">
-          <span className="text-muted-foreground">Vectors:</span>
-          <span>{stats.nodes} indexed</span>
+          <span className="text-muted-foreground">Source:</span>
+          <span>{stats.source} local</span>
         </div>
-        {(stats as any).status && (
-          <div className="flex justify-between">
-            <span className="text-muted-foreground">Status:</span>
-            <span className="capitalize">{(stats as any).status}</span>
-          </div>
-        )}
-        {(stats as any).vectorSize && (
-          <div className="flex justify-between">
-            <span className="text-muted-foreground">Dimensions:</span>
-            <span>{(stats as any).vectorSize}d ({(stats as any).distance})</span>
-          </div>
-        )}
       </div>
       
       <div className="flex space-x-2">
diff --git a/nvidia/txt2kg/assets/frontend/components/qdrant-connection.tsx b/nvidia/txt2kg/assets/frontend/components/qdrant-connection.tsx
new file mode 100644
index 0000000..c8d69ed
--- /dev/null
+++ b/nvidia/txt2kg/assets/frontend/components/qdrant-connection.tsx
@@ -0,0 +1,207 @@
+//
+// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+"use client"
+
+import { useState, useEffect } from "react"
+import { Button } from '@/components/ui/button'
+import { Badge } from '@/components/ui/badge'
+import { InfoIcon } from 'lucide-react'
+import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip'
+import { VectorDBStats } from '@/types/graph'
+
+interface QdrantConnectionProps {
+  className?: string
+}
+
+export function QdrantConnection({ className }: QdrantConnectionProps) {
+  const [connectionStatus, setConnectionStatus] = useState<"connected" | "disconnected" | "checking">("disconnected")
+  const [error, setError] = useState<string | null>(null)
+  const [stats, setStats] = useState<VectorDBStats>({ nodes: 0, relationships: 0, source: 'none' })
+
+  // Fetch vector DB stats
+  const fetchStats = async () => {
+    try {
+      const response = await fetch('/api/vector-db/stats');
+      const data = await response.json();
+      
+      if (response.ok) {
+        setStats({
+          nodes: typeof data.totalVectorCount === 'number' ? data.totalVectorCount : 0,
+          relationships: 0, // Vector DB doesn't store relationships
+          source: data.source || 'unknown',
+          httpHealthy: data.httpHealthy
+        });
+        
+        // If we have a healthy HTTP connection, we're connected
+        if (data.httpHealthy) {
+          setConnectionStatus("connected");
+          setError(null);
+        } else {
+          setConnectionStatus("disconnected");
+          setError(data.error || 'Connection failed');
+        }
+        
+        console.log('Vector DB stats:', data);
+      } else {
+        console.error('Failed to fetch vector DB stats:', data);
+        setConnectionStatus("disconnected");
+        setError(data.error || 'Failed to connect to vector database');
+      }
+    } catch (error) {
+      console.error('Error fetching vector DB stats:', error);
+      setConnectionStatus("disconnected");
+      setError(error instanceof Error ? error.message : 'Error connecting to vector database');
+    }
+  };
+
+  // Check connection status and stats
+  const checkConnection = async () => {
+    setConnectionStatus("checking")
+    setError(null)
+    
+    try {
+      await fetchStats(); // Fetch stats directly - our status is based on having embeddings
+    } catch (error) {
+      console.error('Error connecting to Vector DB:', error)
+      setConnectionStatus("disconnected")
+      setError(error instanceof Error ? error.message : 'Unknown error connecting to Vector DB')
+    }
+  }
+
+  // Reset connection state
+  const disconnect = async () => {
+    setConnectionStatus("disconnected")
+    setStats({ nodes: 0, relationships: 0, source: 'none' })
+  }
+
+  // Initial connection check
+  useEffect(() => {
+    checkConnection()
+  }, [])
+
+  return (
+    <div className={`flex flex-col items-start space-y-4 p-4 border rounded-md ${className}`}>
+      <div className="flex justify-between w-full">
+        <h2 className="text-lg font-medium">Vector DB</h2>
+        <TooltipProvider>
+          <Tooltip>
+            <TooltipTrigger>
+              <InfoIcon className="h-5 w-5 text-muted-foreground" />
+            </TooltipTrigger>
+            <TooltipContent>
+              <p>Qdrant stores vector embeddings for semantic search</p>
+            </TooltipContent>
+          </Tooltip>
+        </TooltipProvider>
+      </div>
+      
+      <div className="flex items-center space-x-2">
+        <span className="text-sm">Status:</span>
+        {connectionStatus === "connected" ? (
+          <Badge variant="outline" className="bg-green-50 text-green-700 hover:bg-green-50 border-green-200">Connected</Badge>
+        ) : connectionStatus === "checking" ? (
+          <Badge variant="outline" className="bg-yellow-50 text-yellow-700 hover:bg-yellow-50 border-yellow-200">Checking...</Badge>
+        ) : (
+          <Badge variant="outline" className="bg-red-50 text-red-700 hover:bg-red-50 border-red-200">Disconnected</Badge>
+        )}
+      </div>
+      
+      {error && (
+        <div className="text-sm text-red-600 bg-red-50 p-2 rounded w-full overflow-auto max-h-20">
+          <p className="whitespace-normal break-words">Error: {error}</p>
+          {error.includes('404') && (
+            <p className="mt-1 text-xs">
+              The Qdrant server is running but the collection doesn't exist yet.
+              <button
+                onClick={async () => {
+                  setConnectionStatus("checking");
+                  setError(null);
+                  try {
+                    const response = await fetch('/api/vector-db/create-collection', { method: 'POST' });
+                    if (response.ok) {
+                      // Wait a bit for the collection to be created
+                      await new Promise(resolve => setTimeout(resolve, 2000));
+                      checkConnection();
+                    } else {
+                      const data = await response.json();
+                      setError(data.error || 'Failed to create collection');
+                      setConnectionStatus("disconnected");
+                    }
+                  } catch (err) {
+                    setError(err instanceof Error ? err.message : 'Error creating collection');
+                    setConnectionStatus("disconnected");
+                  }
+                }}
+                className="ml-1 text-blue-600 hover:text-blue-800 underline"
+              >
+                Click here to create the collection
+              </button>
+              <br />
+              <span className="text-xs text-gray-600">Or using Docker Compose: </span>
+              <code className="mx-1 px-1 bg-gray-100 rounded">docker compose restart qdrant</code>
+            </p>
+          )}
+        </div>
+      )}
+      
+      <div className="text-sm space-y-1 w-full">
+        <div className="flex justify-between">
+          <span className="text-muted-foreground">Qdrant</span>
+          <span className="text-xs text-muted-foreground">{(stats as any).url || 'http://qdrant:6333'}</span>
+        </div>
+        <div className="flex justify-between">
+          <span className="text-muted-foreground">Vectors:</span>
+          <span>{stats.nodes} indexed</span>
+        </div>
+        {(stats as any).status && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">Status:</span>
+            <span className="capitalize">{(stats as any).status}</span>
+          </div>
+        )}
+        {(stats as any).vectorSize && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">Dimensions:</span>
+            <span>{(stats as any).vectorSize}d ({(stats as any).distance})</span>
+          </div>
+        )}
+      </div>
+      
+      <div className="flex space-x-2">
+        <Button 
+          variant="outline" 
+          size="sm" 
+          onClick={checkConnection}
+          disabled={connectionStatus === "checking"}
+        >
+          {connectionStatus === "checking" ? "Checking..." : "Check Connection"}
+        </Button>
+        
+        {connectionStatus === "connected" && (
+          <Button 
+            variant="outline" 
+            size="sm" 
+            onClick={disconnect}
+          >
+            Disconnect
+          </Button>
+        )}
+      </div>
+    </div>
+  )
+}
+
diff --git a/nvidia/txt2kg/assets/frontend/components/rag-query.tsx b/nvidia/txt2kg/assets/frontend/components/rag-query.tsx
index 5a44114..dd7423b 100644
--- a/nvidia/txt2kg/assets/frontend/components/rag-query.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/rag-query.tsx
@@ -156,16 +156,21 @@ export function RagQuery({
                   : 'border-border/30 opacity-50 cursor-not-allowed'
             }`}
           >
-            <div className="w-5 h-5 rounded-md bg-nvidia-green/15 flex items-center justify-center mb-1.5">
-              <Zap className="h-2.5 w-2.5 text-nvidia-green" />
+            <div className={`w-5 h-5 rounded-md flex items-center justify-center mb-1.5 ${vectorEnabled ? 'bg-nvidia-green/15' : 'bg-muted/15'}`}>
+              <Zap className={`h-2.5 w-2.5 ${vectorEnabled ? 'text-nvidia-green' : 'text-muted-foreground'}`} />
             </div>
-            <span className="text-sm font-semibold">Pure RAG</span>
+            <span className={`text-sm font-semibold ${!vectorEnabled ? 'text-muted-foreground' : ''}`}>Pure RAG</span>
             <span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
               Vector DB + LLM
             </span>
             {queryMode === 'pure-rag' && (
               <div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
             )}
+            {!vectorEnabled && (
+              <div className="text-[9px] px-1.5 py-0.5 bg-blue-500/20 text-blue-700 dark:text-blue-400 rounded mt-1 font-medium">
+                NEEDS EMBEDDINGS
+              </div>
+            )}
           </button>
 
           <button
diff --git a/nvidia/txt2kg/assets/frontend/components/settings-modal.tsx b/nvidia/txt2kg/assets/frontend/components/settings-modal.tsx
index c05cbf8..736ce94 100644
--- a/nvidia/txt2kg/assets/frontend/components/settings-modal.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/settings-modal.tsx
@@ -76,10 +76,8 @@ export function SettingsModal() {
   const [arangoUser, setArangoUser] = useState("")
   const [arangoPassword, setArangoPassword] = useState("")
   
-  // Vector DB settings - changed from Milvus to Pinecone
-  const [pineconeApiKey, setPineconeApiKey] = useState("")
-  const [pineconeEnvironment, setPineconeEnvironment] = useState("")
-  const [pineconeIndex, setPineconeIndex] = useState("")
+  // Vector DB settings - Qdrant
+  const [qdrantUrl, setQdrantUrl] = useState("")
   
   // S3 Storage settings
   const [s3Endpoint, setS3Endpoint] = useState("")
@@ -171,9 +169,20 @@ export function SettingsModal() {
       setIsS3Connected(s3Connected)
     }
     
-    // Load graph DB type
-    const storedGraphDbType = localStorage.getItem("graph_db_type") || "arangodb"
-    setGraphDbType(storedGraphDbType as GraphDBType)
+    // Load graph DB type - fetch from server if not in localStorage
+    const storedGraphDbType = localStorage.getItem("graph_db_type")
+    if (storedGraphDbType) {
+      setGraphDbType(storedGraphDbType as GraphDBType)
+    } else {
+      // Fetch server's default (from GRAPH_DB_TYPE env var)
+      fetch('/api/settings')
+        .then(res => res.json())
+        .then(data => {
+          const serverDefault = data.settings?.graph_db_type || 'neo4j'
+          setGraphDbType(serverDefault as GraphDBType)
+        })
+        .catch(() => setGraphDbType('neo4j'))
+    }
     
     // Load Neo4j settings
     setNeo4jUrl(localStorage.getItem("neo4j_url") || "")
@@ -186,9 +195,7 @@ export function SettingsModal() {
     setArangoUser(localStorage.getItem("arango_user") || "")
     setArangoPassword(localStorage.getItem("arango_password") || "")
     
-    setPineconeApiKey(localStorage.getItem("pinecone_api_key") || "")
-    setPineconeEnvironment(localStorage.getItem("pinecone_environment") || "")
-    setPineconeIndex(localStorage.getItem("pinecone_index") || "")
+    setQdrantUrl(localStorage.getItem("qdrant_url") || "http://localhost:6333")
   }, [isOpen])
   
   // Save database settings
@@ -249,9 +256,7 @@ export function SettingsModal() {
   const saveVectorDbSettings = async (e: React.FormEvent) => {
     e.preventDefault()
     
-    localStorage.setItem("pinecone_api_key", pineconeApiKey)
-    localStorage.setItem("pinecone_environment", pineconeEnvironment)
-    localStorage.setItem("pinecone_index", pineconeIndex)
+    localStorage.setItem("qdrant_url", qdrantUrl)
     
     // Sync settings with server
     try {
@@ -262,9 +267,7 @@ export function SettingsModal() {
         },
         body: JSON.stringify({
           settings: {
-            pinecone_api_key: pineconeApiKey,
-            pinecone_environment: pineconeEnvironment,
-            pinecone_index: pineconeIndex,
+            qdrant_url: qdrantUrl,
           }
         }),
       });
@@ -452,7 +455,11 @@ export function SettingsModal() {
   return (
     <Dialog open={isOpen} onOpenChange={setIsOpen}>
       <DialogTrigger asChild>
-        <button className="flex items-center justify-center gap-2 p-2 hover:bg-primary/10 rounded-full transition-colors" title="Settings">
+        <button 
+          className="flex items-center justify-center gap-2 p-2 hover:bg-primary/10 rounded-full transition-colors" 
+          aria-label="Open settings"
+          title="Settings"
+        >
           <Settings className="h-5 w-5 text-muted-foreground hover:text-primary transition-colors" />
         </button>
       </DialogTrigger>
@@ -668,44 +675,22 @@ export function SettingsModal() {
                 <div className="space-y-2">
                   <label className="text-sm font-semibold text-foreground flex items-center gap-2">
                     <SearchIcon className="h-4 w-4 text-nvidia-green" />
-                    Pinecone Configuration
+                    Qdrant Configuration
                   </label>
                 </div>
                 
                 <div className="bg-background/50 rounded-lg p-3 space-y-3">
                   <div className="grid grid-cols-1 gap-3">
                     <div>
-                      <label className="text-xs font-medium text-muted-foreground mb-1 block">API Key</label>
+                      <label className="text-xs font-medium text-muted-foreground mb-1 block">Qdrant URL</label>
                       <input
-                        type="password"
-                        value={pineconeApiKey}
-                        onChange={(e) => setPineconeApiKey(e.target.value)}
-                        placeholder="Enter your Pinecone API key"
+                        type="text"
+                        value={qdrantUrl}
+                        onChange={(e) => setQdrantUrl(e.target.value)}
+                        placeholder="http://localhost:6333"
                         className="w-full bg-background border border-border/60 rounded-md p-2 text-sm text-foreground focus:ring-1 focus:ring-primary/50 focus:border-primary transition-colors"
                       />
                     </div>
-                    <div className="grid grid-cols-2 gap-3">
-                      <div>
-                        <label className="text-xs font-medium text-muted-foreground mb-1 block">Environment</label>
-                        <input
-                          type="text"
-                          value={pineconeEnvironment}
-                          onChange={(e) => setPineconeEnvironment(e.target.value)}
-                          placeholder="us-west1-gcp"
-                          className="w-full bg-background border border-border/60 rounded-md p-2 text-sm text-foreground focus:ring-1 focus:ring-primary/50 focus:border-primary transition-colors"
-                        />
-                      </div>
-                      <div>
-                        <label className="text-xs font-medium text-muted-foreground mb-1 block">Index Name</label>
-                        <input
-                          type="text"
-                          value={pineconeIndex}
-                          onChange={(e) => setPineconeIndex(e.target.value)}
-                          placeholder="knowledge-graph"
-                          className="w-full bg-background border border-border/60 rounded-md p-2 text-sm text-foreground focus:ring-1 focus:ring-primary/50 focus:border-primary transition-colors"
-                        />
-                      </div>
-                    </div>
                   </div>
                 </div>
                 
diff --git a/nvidia/txt2kg/assets/frontend/components/theme-toggle.tsx b/nvidia/txt2kg/assets/frontend/components/theme-toggle.tsx
index 4305821..aecabcd 100644
--- a/nvidia/txt2kg/assets/frontend/components/theme-toggle.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/theme-toggle.tsx
@@ -21,12 +21,16 @@ import { useTheme } from "./theme-provider"
 
 export function ThemeToggle() {
   const { theme, setTheme } = useTheme()
+  
+  const nextTheme = theme === "dark" ? "light" : "dark"
+  const label = `Switch to ${nextTheme} theme (currently ${theme})`
 
   return (
     <button
-      className="btn-icon relative"
-      onClick={() => setTheme(theme === "dark" ? "light" : "dark")}
-      aria-label="Toggle theme"
+      className="btn-icon relative focus-visible:ring-2 focus-visible:ring-nvidia-green focus-visible:ring-offset-2 focus-visible:ring-offset-background rounded-lg"
+      onClick={() => setTheme(nextTheme)}
+      aria-label={label}
+      title={`Switch to ${nextTheme} theme`}
     >
       <Sun
         className={`h-5 w-5 transition-all ${theme === "dark" ? "opacity-0 scale-0 rotate-90 absolute" : "opacity-100 scale-100 rotate-0 relative"}`}
diff --git a/nvidia/txt2kg/assets/frontend/components/triple-editor.tsx b/nvidia/txt2kg/assets/frontend/components/triple-editor.tsx
index 8c6064d..7fae534 100644
--- a/nvidia/txt2kg/assets/frontend/components/triple-editor.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/triple-editor.tsx
@@ -91,11 +91,16 @@ export function TripleEditor({ triple, index, onSave, onCancel }: TripleEditorPr
         <button
           type="button"
           onClick={onCancel}
+          aria-label="Cancel editing triple"
           className="p-2 text-muted-foreground hover:text-foreground rounded-full hover:bg-muted/50 transition-colors"
         >
           <X className="h-4 w-4" />
         </button>
-        <button type="submit" className="p-2 text-primary hover:text-primary/80 rounded-full hover:bg-primary/10 transition-colors">
+        <button 
+          type="submit" 
+          aria-label="Save triple"
+          className="p-2 text-primary hover:text-primary/80 rounded-full hover:bg-primary/10 transition-colors"
+        >
           <Check className="h-4 w-4" />
         </button>
       </div>
diff --git a/nvidia/txt2kg/assets/frontend/components/triple-viewer.tsx b/nvidia/txt2kg/assets/frontend/components/triple-viewer.tsx
index 6497e0c..19ce41a 100644
--- a/nvidia/txt2kg/assets/frontend/components/triple-viewer.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/triple-viewer.tsx
@@ -19,8 +19,18 @@
 import { useState, useEffect, useRef } from "react"
 import { useDocuments } from "@/contexts/document-context"
 import type { Triple } from "@/utils/text-processing"
-import { Pencil, Trash2, Plus, Download, ChevronDown, FileJson, FileText, List, Network, Check, X, Database } from "lucide-react"
+import { Pencil, Trash2, Plus, Download, ChevronDown, FileJson, FileText, List, Network, Check, X, Database, AlertCircle } from "lucide-react"
 import { TripleEditor } from "./triple-editor"
+import {
+  AlertDialog,
+  AlertDialogAction,
+  AlertDialogCancel,
+  AlertDialogContent,
+  AlertDialogDescription,
+  AlertDialogFooter,
+  AlertDialogHeader,
+  AlertDialogTitle,
+} from "@/components/ui/alert-dialog"
 
 // Add this new EntityEditor component before the TripleViewer component
 interface EntityEditorProps {
@@ -59,11 +69,16 @@ function EntityEditor({ entity, onSave, onCancel }: EntityEditorProps) {
         <button
           type="button"
           onClick={onCancel}
+          aria-label="Cancel editing entity"
           className="p-2 text-muted-foreground hover:text-foreground rounded-full hover:bg-muted/30"
         >
           <X className="h-4 w-4" />
         </button>
-        <button type="submit" className="p-2 text-primary hover:text-primary/80 rounded-full hover:bg-primary/10">
+        <button 
+          type="submit" 
+          aria-label="Save entity changes"
+          className="p-2 text-primary hover:text-primary/80 rounded-full hover:bg-primary/10"
+        >
           <Check className="h-4 w-4" />
         </button>
       </div>
@@ -87,6 +102,12 @@ export function TripleViewer() {
   const [isDropdownOpen, setIsDropdownOpen] = useState(false)
   const [searchQuery, setSearchQuery] = useState('')
   const dropdownRef = useRef<HTMLDivElement>(null)
+  
+  // Delete confirmation dialog state
+  const [showDeleteTripleDialog, setShowDeleteTripleDialog] = useState(false)
+  const [tripleToDelete, setTripleToDelete] = useState<{ index: number, triple: Triple } | null>(null)
+  const [showDeleteEntityDialog, setShowDeleteEntityDialog] = useState(false)
+  const [entityToDelete, setEntityToDelete] = useState<string | null>(null)
 
   // Handle click outside to close dropdown
   useEffect(() => {
@@ -167,12 +188,19 @@ export function TripleViewer() {
   }
 
   const handleDeleteTriple = (index: number) => {
-    if (selectedDoc) {
-      if (confirm("Are you sure you want to delete this triple?")) {
-        deleteTriple(selectedDoc.id, index)
-      }
+    if (selectedDoc && selectedDoc.triples) {
+      setTripleToDelete({ index, triple: selectedDoc.triples[index] })
+      setShowDeleteTripleDialog(true)
     }
   }
+  
+  const confirmDeleteTriple = () => {
+    if (selectedDoc && tripleToDelete !== null) {
+      deleteTriple(selectedDoc.id, tripleToDelete.index)
+    }
+    setShowDeleteTripleDialog(false)
+    setTripleToDelete(null)
+  }
 
   const exportTriplesCSV = () => {
     if (!selectedDoc || !selectedDoc.triples) return
@@ -281,16 +309,22 @@ export function TripleViewer() {
 
   const handleDeleteEntity = (entity: string) => {
     if (!selectedDoc || !selectedDoc.triples) return;
-    
-    if (confirm(`Are you sure you want to delete the entity "${entity}"? This will remove all triples containing this entity.`)) {
+    setEntityToDelete(entity)
+    setShowDeleteEntityDialog(true)
+  };
+  
+  const confirmDeleteEntity = () => {
+    if (selectedDoc && selectedDoc.triples && entityToDelete) {
       // Filter out all triples that contain the entity
       const filteredTriples = selectedDoc.triples.filter(triple => 
-        triple.subject !== entity && triple.object !== entity
+        triple.subject !== entityToDelete && triple.object !== entityToDelete
       );
       
       // Update the document with the filtered triples
       updateTriples(selectedDoc.id, filteredTriples);
     }
+    setShowDeleteEntityDialog(false)
+    setEntityToDelete(null)
   };
 
   // Function to store triples in the Neo4j database
@@ -383,8 +417,11 @@ export function TripleViewer() {
           <label className="text-sm font-semibold text-foreground whitespace-nowrap">Select Document</label>
           <div className="relative w-64">
             <button
-              className="w-full flex items-center justify-between bg-card border border-border rounded-lg p-3 text-foreground text-sm hover:bg-muted/30 transition-colors"
+              className="w-full flex items-center justify-between bg-card border border-border rounded-lg p-3 text-foreground text-sm hover:bg-muted/30 transition-colors focus-visible:ring-2 focus-visible:ring-nvidia-green focus-visible:ring-offset-2"
               onClick={() => setIsDropdownOpen(!isDropdownOpen)}
+              aria-haspopup="listbox"
+              aria-expanded={isDropdownOpen}
+              aria-label={`Select document. Currently selected: ${selectedDoc?.name || 'None'}`}
             >
               <span className="truncate">
                 {selectedDoc?.name || "Select document"}
@@ -400,13 +437,18 @@ export function TripleViewer() {
                 strokeLinecap="round"
                 strokeLinejoin="round"
                 className={`transition-transform ${isDropdownOpen ? 'rotate-180' : ''}`}
+                aria-hidden="true"
               >
                 <polyline points="6 9 12 15 18 9"></polyline>
               </svg>
             </button>
             
             {isDropdownOpen && (
-              <div className="absolute z-10 mt-1 w-full bg-card border border-border rounded-lg shadow-lg max-h-64 overflow-y-auto">
+              <div 
+                className="absolute z-10 mt-1 w-full bg-card border border-border rounded-lg shadow-lg max-h-64 overflow-y-auto"
+                role="listbox"
+                aria-label="Processed documents"
+              >
                 <div className="p-2 sticky top-0 bg-card border-b border-border">
                   <input
                     type="text"
@@ -425,6 +467,8 @@ export function TripleViewer() {
                   filteredDocs.map((doc) => (
                     <button
                       key={doc.id}
+                      role="option"
+                      aria-selected={doc.id === selectedDoc?.id}
                       className={`w-full text-left p-2 hover:bg-muted/30 text-sm ${
                         doc.id === selectedDoc?.id ? 'bg-primary/10 text-primary' : ''
                       }`}
@@ -657,6 +701,7 @@ export function TripleViewer() {
                               <button
                                 onClick={() => setEditingIndex(index)}
                                 className="p-1.5 text-muted-foreground hover:text-foreground rounded-full hover:bg-muted/50 transition-colors"
+                                aria-label={`Edit triple: ${normalizeText(triple.subject)} ${normalizeText(triple.predicate)} ${normalizeText(triple.object)}`}
                                 title="Edit Triple"
                               >
                                 <Pencil className="h-3.5 w-3.5" />
@@ -664,6 +709,7 @@ export function TripleViewer() {
                               <button
                                 onClick={() => handleDeleteTriple(index)}
                                 className="p-1.5 text-muted-foreground hover:text-destructive rounded-full hover:bg-destructive/10 transition-colors"
+                                aria-label={`Delete triple: ${normalizeText(triple.subject)} ${normalizeText(triple.predicate)} ${normalizeText(triple.object)}`}
                                 title="Delete Triple"
                               >
                                 <Trash2 className="h-3.5 w-3.5" />
@@ -805,6 +851,7 @@ export function TripleViewer() {
                               <button
                                 onClick={() => setEditingEntityIndex(index)}
                                 className="p-1.5 text-muted-foreground hover:text-foreground rounded-full hover:bg-muted/30"
+                                aria-label={`Edit entity: ${normalizeText(entity)}`}
                                 title="Edit Entity"
                               >
                                 <Pencil className="h-3.5 w-3.5" />
@@ -812,6 +859,7 @@ export function TripleViewer() {
                               <button
                                 onClick={() => handleDeleteEntity(entity)}
                                 className="p-1.5 text-muted-foreground hover:text-destructive rounded-full hover:bg-destructive/10"
+                                aria-label={`Delete entity: ${normalizeText(entity)}`}
                                 title="Delete Entity"
                               >
                                 <Trash2 className="h-3.5 w-3.5" />
@@ -837,6 +885,66 @@ export function TripleViewer() {
           )}
         </>
       )}
+      
+      {/* Delete Triple Confirmation Dialog */}
+      <AlertDialog open={showDeleteTripleDialog} onOpenChange={setShowDeleteTripleDialog}>
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle className="flex items-center gap-2">
+              <Trash2 className="h-5 w-5 text-destructive" />
+              Delete Triple
+            </AlertDialogTitle>
+            <AlertDialogDescription>
+              Are you sure you want to delete this triple?
+              {tripleToDelete && (
+                <div className="mt-3 p-3 bg-muted/50 rounded-lg text-sm font-mono">
+                  <span className="text-foreground">{normalizeText(tripleToDelete.triple.subject)}</span>
+                  <span className="text-muted-foreground mx-2">→</span>
+                  <span className="text-primary">{normalizeText(tripleToDelete.triple.predicate)}</span>
+                  <span className="text-muted-foreground mx-2">→</span>
+                  <span className="text-foreground">{normalizeText(tripleToDelete.triple.object)}</span>
+                </div>
+              )}
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+          <AlertDialogFooter>
+            <AlertDialogCancel onClick={() => setTripleToDelete(null)}>Cancel</AlertDialogCancel>
+            <AlertDialogAction 
+              onClick={confirmDeleteTriple}
+              className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
+            >
+              Delete Triple
+            </AlertDialogAction>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
+      
+      {/* Delete Entity Confirmation Dialog */}
+      <AlertDialog open={showDeleteEntityDialog} onOpenChange={setShowDeleteEntityDialog}>
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle className="flex items-center gap-2">
+              <AlertCircle className="h-5 w-5 text-destructive" />
+              Delete Entity
+            </AlertDialogTitle>
+            <AlertDialogDescription>
+              Are you sure you want to delete the entity <strong>"{entityToDelete}"</strong>?
+              <div className="mt-3 p-3 bg-amber-50 dark:bg-amber-950/30 border border-amber-200 dark:border-amber-800/50 rounded-lg text-amber-800 dark:text-amber-300 text-sm">
+                <strong>Warning:</strong> This will remove all triples containing this entity from the knowledge graph.
+              </div>
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+          <AlertDialogFooter>
+            <AlertDialogCancel onClick={() => setEntityToDelete(null)}>Cancel</AlertDialogCancel>
+            <AlertDialogAction 
+              onClick={confirmDeleteEntity}
+              className="bg-destructive text-destructive-foreground hover:bg-destructive/90"
+            >
+              Delete Entity
+            </AlertDialogAction>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
     </div>
   )
 }
diff --git a/nvidia/txt2kg/assets/frontend/components/ui/progress.tsx b/nvidia/txt2kg/assets/frontend/components/ui/progress.tsx
index ac465fb..20986ac 100644
--- a/nvidia/txt2kg/assets/frontend/components/ui/progress.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/ui/progress.tsx
@@ -21,10 +21,15 @@ import * as ProgressPrimitive from "@radix-ui/react-progress"
 
 import { cn } from "@/lib/utils"
 
+interface ProgressProps extends React.ComponentPropsWithoutRef<typeof ProgressPrimitive.Root> {
+  /** Show shimmer animation overlay for visual polish */
+  shimmer?: boolean
+}
+
 const Progress = React.forwardRef<
   React.ElementRef<typeof ProgressPrimitive.Root>,
-  React.ComponentPropsWithoutRef<typeof ProgressPrimitive.Root>
->(({ className, value, ...props }, ref) => (
+  ProgressProps
+>(({ className, value, shimmer = true, ...props }, ref) => (
   <ProgressPrimitive.Root
     ref={ref}
     className={cn(
@@ -34,7 +39,10 @@ const Progress = React.forwardRef<
     {...props}
   >
     <ProgressPrimitive.Indicator
-      className="h-full w-full flex-1 bg-primary transition-all"
+      className={cn(
+        "h-full w-full flex-1 bg-primary transition-all duration-300 ease-out",
+        shimmer && (value ?? 0) > 0 && (value ?? 0) < 100 && "progress-shimmer"
+      )}
       style={{ transform: `translateX(-${100 - (value || 0)}%)` }}
     />
   </ProgressPrimitive.Root>
diff --git a/nvidia/txt2kg/assets/frontend/components/ui/skeleton.tsx b/nvidia/txt2kg/assets/frontend/components/ui/skeleton.tsx
index d3bc78b..6421695 100644
--- a/nvidia/txt2kg/assets/frontend/components/ui/skeleton.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/ui/skeleton.tsx
@@ -16,13 +16,25 @@
 //
 import { cn } from "@/lib/utils"
 
+interface SkeletonProps extends React.HTMLAttributes<HTMLDivElement> {
+  /** Use directional shimmer instead of pulse animation */
+  shimmer?: boolean
+}
+
 function Skeleton({
   className,
+  shimmer = false,
   ...props
-}: React.HTMLAttributes<HTMLDivElement>) {
+}: SkeletonProps) {
   return (
     <div
-      className={cn("animate-pulse rounded-md bg-muted", className)}
+      className={cn(
+        "rounded-md",
+        shimmer 
+          ? "skeleton-shimmer" 
+          : "animate-pulse bg-muted",
+        className
+      )}
       {...props}
     />
   )
diff --git a/nvidia/txt2kg/assets/frontend/components/ui/switch.tsx b/nvidia/txt2kg/assets/frontend/components/ui/switch.tsx
index b90cdbd..a721e1e 100644
--- a/nvidia/txt2kg/assets/frontend/components/ui/switch.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/ui/switch.tsx
@@ -27,7 +27,7 @@ const Switch = React.forwardRef<
 >(({ className, ...props }, ref) => (
   <SwitchPrimitives.Root
     className={cn(
-      "peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input",
+      "peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors duration-200 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 focus-visible:ring-offset-background disabled:cursor-not-allowed disabled:opacity-50 data-[state=checked]:bg-primary data-[state=unchecked]:bg-input active:scale-95",
       className
     )}
     {...props}
@@ -35,7 +35,7 @@ const Switch = React.forwardRef<
   >
     <SwitchPrimitives.Thumb
       className={cn(
-        "pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-transform data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0"
+        "pointer-events-none block h-5 w-5 rounded-full bg-background shadow-lg ring-0 transition-all duration-200 ease-[cubic-bezier(0.34,1.56,0.64,1)] data-[state=checked]:translate-x-5 data-[state=unchecked]:translate-x-0 data-[state=checked]:shadow-primary/25"
       )}
     />
   </SwitchPrimitives.Root>
diff --git a/nvidia/txt2kg/assets/frontend/components/ui/tabs.tsx b/nvidia/txt2kg/assets/frontend/components/ui/tabs.tsx
index 15d089d..ac229e6 100644
--- a/nvidia/txt2kg/assets/frontend/components/ui/tabs.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/ui/tabs.tsx
@@ -60,7 +60,7 @@ const TabsContent = React.forwardRef<
   <TabsPrimitive.Content
     ref={ref}
     className={cn(
-      "mt-2 ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2",
+      "mt-2 ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 data-[state=active]:animate-in data-[state=active]:fade-in-0 data-[state=active]:slide-in-from-bottom-1 data-[state=active]:duration-200",
       className
     )}
     {...props}
diff --git a/nvidia/txt2kg/assets/frontend/components/ui/toast.tsx b/nvidia/txt2kg/assets/frontend/components/ui/toast.tsx
index 8ee3628..e09addf 100644
--- a/nvidia/txt2kg/assets/frontend/components/ui/toast.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/ui/toast.tsx
@@ -48,6 +48,8 @@ const toastVariants = cva(
         default: "border bg-background text-foreground",
         destructive:
           "destructive group border-destructive bg-destructive text-destructive-foreground",
+        success:
+          "success group border-primary/30 bg-primary/10 text-foreground [&>svg]:text-primary",
       },
     },
     defaultVariants: {
diff --git a/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx b/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx
index bbe728d..ad69d05 100644
--- a/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx
+++ b/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx
@@ -393,6 +393,11 @@ export function DocumentProvider({ children }: { children: React.ReactNode }) {
           requestBody.llmProvider = "ollama";
           requestBody.ollamaModel = model.model || "llama3.1:8b";
           console.log(`🦙 Using Ollama model: ${requestBody.ollamaModel}`);
+        } else if (model.provider === "vllm") {
+          requestBody.llmProvider = "vllm";
+          requestBody.vllmModel = model.model;
+          requestBody.vllmBaseUrl = model.baseURL || "http://localhost:8001/v1";
+          console.log(`🚀 Using vLLM model: ${requestBody.vllmModel}`);
         } else if (model.id === "nvidia-nemotron" || model.id === "nvidia-nemotron-nano") {
           requestBody.llmProvider = "nvidia";
           requestBody.nvidiaModel = model.model; // Pass the actual model name
diff --git a/nvidia/txt2kg/assets/frontend/lib/arangodb.ts b/nvidia/txt2kg/assets/frontend/lib/arangodb.ts
index b038b21..2f6febf 100644
--- a/nvidia/txt2kg/assets/frontend/lib/arangodb.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/arangodb.ts
@@ -15,6 +15,7 @@
 // limitations under the License.
 //
 import { Database, aql } from 'arangojs';
+import { createHash } from 'crypto';
 
 /**
  * ArangoDB service for database operations
@@ -29,6 +30,36 @@ export class ArangoDBService {
 
   private constructor() {}
 
+  /**
+   * Generate a deterministic _key from input string using MD5 hash
+   * Uses Node.js built-in crypto module - truncated to 16 chars for compact keys
+   * @param input - String to hash
+   * @returns Hex-encoded hash string (16 chars, safe for ArangoDB _key)
+   */
+  private generateKey(input: string): string {
+    return createHash('md5').update(input).digest('hex').slice(0, 16);
+  }
+
+  /**
+   * Generate a deterministic _key for an entity based on its name
+   * @param name - Entity name
+   * @returns Deterministic _key string
+   */
+  private generateEntityKey(name: string): string {
+    return this.generateKey(name.toLowerCase().trim());
+  }
+
+  /**
+   * Generate a deterministic _key for an edge based on its endpoints and type
+   * @param fromKey - Source entity _key
+   * @param toKey - Target entity _key
+   * @param relationType - Relationship type/predicate
+   * @returns Deterministic _key string
+   */
+  private generateEdgeKey(fromKey: string, toKey: string, relationType: string): string {
+    return this.generateKey(`${fromKey}|${relationType.toLowerCase().trim()}|${toKey}`);
+  }
+
   /**
    * Get the singleton instance of ArangoDBService
    */
@@ -77,9 +108,19 @@ export class ArangoDBService {
       if (!collectionNames.includes(this.collectionName)) {
         await this.db.createCollection(this.collectionName);
         await this.db.collection(this.collectionName).ensureIndex({
-          type: 'persistent',
+          name: 'inverted_index',
+          type: 'inverted',
           fields: ['name'],
-          unique: true
+          analyzer: 'text_en'
+        });
+        await this.db.createView(`${this.collectionName}_view`, {
+          type: 'search-alias',
+          indexes: [
+            {
+              collection: this.collectionName,
+              index: 'inverted_index'
+            }
+          ]
         });
       }
 
@@ -87,19 +128,25 @@ export class ArangoDBService {
       if (!collectionNames.includes(this.edgeCollectionName)) {
         await this.db.createEdgeCollection(this.edgeCollectionName);
         await this.db.collection(this.edgeCollectionName).ensureIndex({
-          type: 'persistent',
-          fields: ['type']
+          name: 'inverted_index',
+          type: 'inverted',
+          fields: ['type'],
+          analyzer: 'text_en'
+        });
+        await this.db.createView(`${this.edgeCollectionName}_view`, {
+          type: 'search-alias',
+          indexes: [
+            {
+              collection: this.edgeCollectionName,
+              index: 'inverted_index'
+            }
+          ]
         });
       }
 
       // Create documents collection if it doesn't exist
       if (!collectionNames.includes(this.documentsCollectionName)) {
         await this.db.createCollection(this.documentsCollectionName);
-        await this.db.collection(this.documentsCollectionName).ensureIndex({
-          type: 'persistent',
-          fields: ['documentName'],
-          unique: true
-        });
       }
 
       console.log('ArangoDB initialized successfully');
@@ -158,7 +205,8 @@ export class ArangoDBService {
 
     try {
       const collection = this.db.collection(this.collectionName);
-      return await collection.save(properties);
+      const doc = { ...properties, _key: this.generateEntityKey(properties.name) }
+      return await collection.save(doc, { overwriteMode: 'update' });
     } catch (error) {
       console.error('Error creating node in ArangoDB:', error);
       throw error;
@@ -186,12 +234,13 @@ export class ArangoDBService {
     try {
       const edgeCollection = this.db.collection(this.edgeCollectionName);
       const edgeData = {
+        _key: this.generateEdgeKey(fromKey, toKey, relationType),
         _from: `${this.collectionName}/${fromKey}`,
         _to: `${this.collectionName}/${toKey}`,
         type: relationType,
         ...properties
       };
-      return await edgeCollection.save(edgeData);
+      return await edgeCollection.save(edgeData, { overwriteMode: 'update' });
     } catch (error) {
       console.error('Error creating relationship in ArangoDB:', error);
       throw error;
@@ -200,54 +249,69 @@ export class ArangoDBService {
 
   /**
    * Import triples (subject, predicate, object) into the graph database
+   * Batches inserts every 1000 documents by default
    * @param triples - Array of triples to import
+   * @param batchSize - Number of documents to insert per batch (default: 1000)
    * @returns Promise resolving when import is complete
    */
-  public async importTriples(triples: { subject: string; predicate: string; object: string }[]): Promise<void> {
+  public async importTriples(
+    triples: { subject: string; predicate: string; object: string }[],
+    batchSize: number = 1000
+  ): Promise<void> {
     if (!this.db) {
       throw new Error('ArangoDB connection not initialized. Call initialize() first.');
     }
 
+    let entityBatch: Array<{ _key: string; name: string }> = [];
+    let edgeBatch: Array<{ _key: string; _from: string; _to: string; type: string }> = [];
+
+    const importEntities = async () => {
+      if (entityBatch.length === 0) return;
+      await this.db!.collection(this.collectionName).saveAll(entityBatch, { overwriteMode: 'ignore' });
+      console.log(`[ArangoDB] Imported ${entityBatch.length} entities`);
+      entityBatch = [];
+    };
+
+    const importEdges = async () => {
+      if (edgeBatch.length === 0) return;
+      await this.db!.collection(this.edgeCollectionName).saveAll(edgeBatch, { overwriteMode: 'ignore' });
+      console.log(`[ArangoDB] Imported ${edgeBatch.length} edges`);
+      edgeBatch = [];
+    };
+
     try {
-      // Process triples in batches to improve performance
       for (const triple of triples) {
-        // Normalize triple values
         const normalizedSubject = triple.subject.trim();
         const normalizedPredicate = triple.predicate.trim();
         const normalizedObject = triple.object.trim();
-        
-        // Skip invalid triples
+
         if (!normalizedSubject || !normalizedPredicate || !normalizedObject) {
           console.warn('Skipping invalid triple:', triple);
           continue;
         }
-        
-        // Upsert subject and object nodes
-        const subjectNode = await this.upsertEntity(normalizedSubject);
-        const objectNode = await this.upsertEntity(normalizedObject);
-        
-        // Check if relationship already exists
-        const existingEdges = await this.executeQuery(
-          `FOR e IN ${this.edgeCollectionName} 
-           FILTER e._from == @from AND e._to == @to AND e.type == @type 
-           RETURN e`,
-          { 
-            from: `${this.collectionName}/${subjectNode._key}`, 
-            to: `${this.collectionName}/${objectNode._key}`, 
-            type: normalizedPredicate 
-          }
-        );
-        
-        // Create relationship if it doesn't exist
-        if (existingEdges.length === 0) {
-          await this.createRelationship(
-            subjectNode._key,
-            objectNode._key,
-            normalizedPredicate
-          );
-        }
+
+        const subjectKey = this.generateEntityKey(normalizedSubject);
+        const objectKey = this.generateEntityKey(normalizedObject);
+        const edgeKey = this.generateEdgeKey(subjectKey, objectKey, normalizedPredicate);
+
+        entityBatch.push({ _key: subjectKey, name: normalizedSubject });
+        entityBatch.push({ _key: objectKey, name: normalizedObject });
+
+        edgeBatch.push({
+          _key: edgeKey,
+          _from: `${this.collectionName}/${subjectKey}`,
+          _to: `${this.collectionName}/${objectKey}`,
+          type: normalizedPredicate
+        });
+
+        if (entityBatch.length >= batchSize) await importEntities();
+        if (edgeBatch.length >= batchSize) await importEdges();
       }
-      
+
+      // Flush remaining
+      await importEntities();
+      await importEdges();
+
       console.log(`Successfully imported ${triples.length} triples into ArangoDB`);
     } catch (error) {
       console.error('Error importing triples into ArangoDB:', error);
@@ -255,28 +319,6 @@ export class ArangoDBService {
     }
   }
 
-  /**
-   * Helper method to upsert (create or update) an entity
-   * @param name - Entity name
-   * @returns Promise resolving to the entity
-   */
-  private async upsertEntity(name: string): Promise<any> {
-    const collection = this.db!.collection(this.collectionName);
-    
-    // Look for existing entity
-    const existing = await this.executeQuery(
-      `FOR e IN ${this.collectionName} FILTER e.name == @name RETURN e`,
-      { name }
-    );
-    
-    if (existing.length > 0) {
-      return existing[0];
-    }
-    
-    // Create new entity
-    return await collection.save({ name });
-  }
-
   /**
    * Check if a document has already been processed and stored in ArangoDB
    * @param documentName - Name of the document to check
@@ -287,16 +329,9 @@ export class ArangoDBService {
       throw new Error('ArangoDB connection not initialized. Call initialize() first.');
     }
 
-    try {
-      const existing = await this.executeQuery(
-        `FOR d IN ${this.documentsCollectionName} FILTER d.documentName == @documentName RETURN d`,
-        { documentName }
-      );
-      return existing.length > 0;
-    } catch (error) {
-      console.error('Error checking if document is processed:', error);
-      return false;
-    }
+    const collection = this.db.collection(this.documentsCollectionName);
+    const key = this.generateKey(documentName.trim());
+    return await collection.documentExists(key);
   }
 
   /**
@@ -312,30 +347,18 @@ export class ArangoDBService {
 
     try {
       const collection = this.db.collection(this.documentsCollectionName);
-      await collection.save({
+      const doc = {
+        _key: this.generateKey(documentName.trim()),
         documentName,
         tripleCount,
         processedAt: new Date().toISOString()
-      });
+      };
+
+      await collection.save(doc, { overwriteMode: 'replace' });
       console.log(`Marked document "${documentName}" as processed with ${tripleCount} triples`);
     } catch (error) {
-      // If error is due to unique constraint (document already exists), update it instead
-      if (error && typeof error === 'object' && 'errorNum' in error && error.errorNum === 1210) {
-        console.log(`Document "${documentName}" already exists, updating...`);
-        await this.executeQuery(
-          `FOR d IN ${this.documentsCollectionName} 
-           FILTER d.documentName == @documentName 
-           UPDATE d WITH { tripleCount: @tripleCount, processedAt: @processedAt } IN ${this.documentsCollectionName}`,
-          { 
-            documentName, 
-            tripleCount,
-            processedAt: new Date().toISOString()
-          }
-        );
-      } else {
-        console.error('Error marking document as processed:', error);
-        throw error;
-      }
+      console.error('Error marking document as processed:', error);
+      throw error;
     }
   }
 
@@ -363,19 +386,19 @@ export class ArangoDBService {
    * Get graph data in a format compatible with the existing application
    * @returns Promise resolving to nodes and relationships
    */
-  public async getGraphData(): Promise<{ 
-    nodes: Array<{ 
-      id: string; 
-      labels: string[]; 
-      [key: string]: any 
-    }>; 
-    relationships: Array<{ 
-      id: string; 
-      source: string; 
-      target: string; 
-      type: string; 
-      [key: string]: any 
-    }>; 
+  public async getGraphData(): Promise<{
+    nodes: Array<{
+      id: string;
+      labels: string[];
+      [key: string]: any
+    }>;
+    relationships: Array<{
+      id: string;
+      source: string;
+      target: string;
+      type: string;
+      [key: string]: any
+    }>;
   }> {
     if (!this.db) {
       throw new Error('ArangoDB connection not initialized. Call initialize() first.');
@@ -386,18 +409,12 @@ export class ArangoDBService {
       const entities = await this.executeQuery(
         `FOR e IN ${this.collectionName} RETURN e`
       );
-      
+
       // Get all relationships (edges)
       const relationships = await this.executeQuery(
         `FOR r IN ${this.edgeCollectionName} RETURN r`
       );
-      
-      // Build id to key mapping for relationships
-      const idToKey = new Map<string, string>();
-      for (const entity of entities) {
-        idToKey.set(entity._id, entity._key);
-      }
-      
+
       // Format nodes in a way compatible with the application
       const nodes = entities.map(entity => ({
         id: entity._key,
@@ -405,13 +422,12 @@ export class ArangoDBService {
         name: entity.name,
         ...entity
       }));
-      
+
       // Format relationships in a way compatible with the application
       const formattedRelationships = relationships.map(rel => {
-        // Extract the entity keys from _from and _to
         const source = rel._from.split('/')[1];
         const target = rel._to.split('/')[1];
-        
+
         return {
           id: rel._key,
           source,
@@ -420,7 +436,7 @@ export class ArangoDBService {
           ...rel
         };
       });
-      
+
       return {
         nodes,
         relationships: formattedRelationships
@@ -435,7 +451,7 @@ export class ArangoDBService {
    * Log query information and metrics
    */
   public async logQuery(
-    query: string, 
+    query: string,
     queryMode: 'traditional' | 'vector-search' | 'pure-rag',
     metrics: {
       executionTimeMs: number;
@@ -453,11 +469,11 @@ export class ArangoDBService {
       // Create a queryLogs collection if it doesn't exist
       const collections = await this.db.listCollections();
       const collectionNames = collections.map(c => c.name);
-      
+
       if (!collectionNames.includes('queryLogs')) {
         await this.db.createCollection('queryLogs');
       }
-      
+
       // Store query log
       const queryLog = {
         query,
@@ -465,7 +481,7 @@ export class ArangoDBService {
         metrics,
         timestamp: new Date().toISOString()
       };
-      
+
       await this.db.collection('queryLogs').save(queryLog);
     } catch (error) {
       console.error('Error logging query to ArangoDB:', error);
@@ -488,17 +504,17 @@ export class ArangoDBService {
       // Check if queryLogs collection exists
       const collections = await this.db.listCollections();
       const collectionNames = collections.map(c => c.name);
-      
+
       if (!collectionNames.includes('queryLogs')) {
         return [];
       }
-      
+
       // Get logs sorted by timestamp
       const logs = await this.executeQuery(
         `FOR l IN queryLogs SORT l.timestamp DESC LIMIT @limit RETURN l`,
         { limit }
       );
-      
+
       return logs;
     } catch (error) {
       console.error('Error getting query logs from ArangoDB:', error);
@@ -507,16 +523,19 @@ export class ArangoDBService {
   }
 
   /**
-   * Perform graph traversal to find relevant triples using ArangoDB's native graph capabilities
+   * Perform graph traversal to find relevant triples using ArangoDB's native text search and graph capabilities
+   * Uses inverted indexes with BM25 scoring for efficient keyword matching
    * @param keywords - Array of keywords to search for
    * @param maxDepth - Maximum traversal depth (default: 2)
    * @param maxResults - Maximum number of results to return (default: 100)
+   * @param maxSeeds - Maximum number of seed nodes/edges from text search (default: 50)
    * @returns Promise resolving to array of triples with relevance scores
    */
   public async graphTraversal(
     keywords: string[],
     maxDepth: number = 2,
-    maxResults: number = 100
+    maxResults: number = 100,
+    maxSeeds: number = 50
   ): Promise<Array<{
     subject: string;
     predicate: string;
@@ -540,93 +559,89 @@ export class ArangoDBService {
         return [];
       }
 
-      // AQL query that:
-      // 1. Finds seed nodes matching keywords
-      // 2. Performs graph traversal from those nodes
-      // 3. Scores results based on keyword matches and depth
       const query = `
-        // Find all entities matching keywords (case-insensitive)
+        // 1. Tokenize keywords using the same analyzer as the index
+        LET keywords_merged = CONCAT_SEPARATOR(" ", @keywords)
+        LET keywords_tokens = TOKENS(keywords_merged, "text_en")
+
+        // 2. Match for entity.name
         LET seedNodes = (
-          FOR entity IN ${this.collectionName}
-            LET lowerName = LOWER(entity.name)
-            LET matches = (
-              FOR keyword IN @keywords
-                FILTER CONTAINS(lowerName, keyword)
-                RETURN 1
-            )
-            FILTER LENGTH(matches) > 0
+          FOR vertex IN ${this.collectionName}_view
+            SEARCH ANALYZER(vertex.name IN keywords_tokens, "text_en")
+            LET score = BM25(vertex)
+            SORT score DESC
+            LIMIT @maxSeeds
+            RETURN { vertex, score }
+        )
+
+        // 3. Match for relationship.type
+        LET seedEdges = (
+          FOR edge IN ${this.edgeCollectionName}_view
+            SEARCH ANALYZER(edge.type IN keywords_tokens, "text_en")
+            LET score = BM25(edge)
+            SORT score DESC
+            LIMIT @maxSeeds
+            RETURN { edge, score }
+        )
+
+        // 4. Normalize scores
+        LET maxNodeScore = MAX(seedNodes[*].score) || 1
+        LET maxEdgeScore = MAX(seedEdges[*].score) || 1
+
+        // 5. Traverse from seedNodes up to maxDepth
+        LET traversalResults = (
+          FOR seed IN seedNodes
+            FOR v, e, p IN 1..@maxDepth ANY seed.vertex ${this.edgeCollectionName}
+              OPTIONS { uniqueVertices: 'path', bfs: true }
+
+              LET subjectEntity = DOCUMENT(e._from)
+              LET objectEntity = DOCUMENT(e._to)
+              LET depth = LENGTH(p.edges) - 1
+
+              // Depth penalty: closer to seed = higher score
+              LET depthPenalty = 1.0 / (1.0 + depth * 0.2)
+
+              // Normalize seed score and apply depth penalty
+              LET normalizedSeedScore = seed.score / maxNodeScore
+              LET confidence = normalizedSeedScore * depthPenalty
+
+              RETURN {
+                subject: subjectEntity.name,
+                predicate: e.type,
+                object: objectEntity.name,
+                confidence: confidence,
+                depth: depth,
+                _edgeId: e._id,
+                pathLength: LENGTH(p.edges)
+              }
+        )
+
+        // 6. Collect triples from seedEdges (direct hits)
+        LET edgeResults = (
+          FOR seed IN seedEdges
+            LET subjectEntity = DOCUMENT(seed.edge._from)
+            LET objectEntity = DOCUMENT(seed.edge._to)
+
+            // Direct edge matches get a boost (depth 0)
+            LET normalizedScore = seed.score / maxEdgeScore
+
             RETURN {
-              node: entity,
-              matchCount: LENGTH(matches)
+              subject: subjectEntity.name,
+              predicate: seed.edge.type,
+              object: objectEntity.name,
+              confidence: normalizedScore * 1.2, // Boost direct edge matches
+              depth: 0,
+              _edgeId: seed.edge._id,
+              pathLength: 1
             }
         )
 
-        // Perform graph traversal from seed nodes
-        // Multi-hop: Extract ALL edges in each path, not just the final edge
-        LET traversalResults = (
-          FOR seed IN seedNodes
-            FOR v, e, p IN 0..@maxDepth ANY seed.node._id ${this.edgeCollectionName}
-              OPTIONS {uniqueVertices: 'global', bfs: true}
-              FILTER e != null
+        // 7. Combine traversalResults and edgeResults
+        LET combinedResults = APPEND(traversalResults, edgeResults)
 
-              // Extract all edges from the path for multi-hop context
-              LET pathEdges = (
-                FOR edgeIdx IN 0..(LENGTH(p.edges) - 1)
-                  LET pathEdge = p.edges[edgeIdx]
-                  LET subjectEntity = DOCUMENT(pathEdge._from)
-                  LET objectEntity = DOCUMENT(pathEdge._to)
-                  LET subjectLower = LOWER(subjectEntity.name)
-                  LET objectLower = LOWER(objectEntity.name)
-                  LET predicateLower = LOWER(pathEdge.type)
-
-                  // Calculate score for this edge
-                  LET subjectMatches = (
-                    FOR kw IN @keywords
-                      FILTER CONTAINS(subjectLower, kw)
-                      LET isExact = (subjectLower == kw)
-                      RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
-                  )
-                  LET objectMatches = (
-                    FOR kw IN @keywords
-                      FILTER CONTAINS(objectLower, kw)
-                      LET isExact = (objectLower == kw)
-                      RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
-                  )
-                  LET predicateMatches = (
-                    FOR kw IN @keywords
-                      FILTER CONTAINS(predicateLower, kw)
-                      LET isExact = (predicateLower == kw)
-                      RETURN isExact ? 50 : (LENGTH(kw) * LENGTH(kw))
-                  )
-
-                  LET totalScore = SUM(subjectMatches) + SUM(objectMatches) + SUM(predicateMatches)
-
-                  // Depth penalty (edges earlier in path get slight boost)
-                  LET depthPenalty = 1.0 / (1.0 + (edgeIdx * 0.1))
-
-                  LET confidence = MIN([totalScore * depthPenalty / 1000.0, 1.0])
-
-                  FILTER confidence > 0
-
-                  RETURN {
-                    subject: subjectEntity.name,
-                    predicate: pathEdge.type,
-                    object: objectEntity.name,
-                    confidence: confidence,
-                    depth: edgeIdx,
-                    _edgeId: pathEdge._id,
-                    pathLength: LENGTH(p.edges)
-                  }
-              )
-
-              // Return all edges from this path
-              FOR pathTriple IN pathEdges
-                RETURN pathTriple
-        )
-
-        // Remove duplicates by edge ID and sort by confidence
+        // 8. Remove duplicates by edge ID and sort by confidence
         LET uniqueResults = (
-          FOR result IN traversalResults
+          FOR result IN combinedResults
             COLLECT edgeId = result._edgeId INTO groups
             LET best = FIRST(
               FOR g IN groups
@@ -636,8 +651,9 @@ export class ArangoDBService {
             RETURN best
         )
 
-        // Sort by confidence and limit results
+        // 9. Sort by confidence and limit results
         FOR result IN uniqueResults
+          FILTER result != null
           SORT result.confidence DESC, result.depth ASC
           LIMIT @maxResults
           RETURN {
@@ -655,14 +671,15 @@ export class ArangoDBService {
       const results = await this.executeQuery(query, {
         keywords: keywordConditions,
         maxDepth,
-        maxResults
+        maxResults,
+        maxSeeds
       });
 
-      console.log(`[ArangoDB] Multi-hop graph traversal found ${results.length} triples for keywords: ${keywords.join(', ')}`);
+      console.log(`[ArangoDB] Found ${results.length} triples for keywords: ${keywords.join(', ')}`);
 
       // Log top 10 results with confidence scores
       if (results.length > 0) {
-        console.log('[ArangoDB] Top 10 triples by confidence (multi-hop):');
+        console.log('[ArangoDB] Top 10 triples by confidence:');
         results.slice(0, 10).forEach((triple: any, idx: number) => {
           const pathInfo = triple.pathLength ? ` path=${triple.pathLength}` : '';
           console.log(`  ${idx + 1}. [conf=${triple.confidence?.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth=${triple.depth}${pathInfo})`);
@@ -705,22 +722,22 @@ export class ArangoDBService {
     try {
       // Truncate the entities collection (nodes)
       await this.db.collection(this.collectionName).truncate();
-      
+
       // Truncate the relationships collection (edges)
       await this.db.collection(this.edgeCollectionName).truncate();
-      
+
       // Also clear query logs if they exist
       const collections = await this.db.listCollections();
       const collectionNames = collections.map(c => c.name);
-      
+
       if (collectionNames.includes('queryLogs')) {
         await this.db.collection('queryLogs').truncate();
       }
-      
+
       console.log('ArangoDB database cleared successfully');
     } catch (error) {
       console.error('Error clearing ArangoDB database:', error);
       throw error;
     }
   }
-} 
\ No newline at end of file
+}
diff --git a/nvidia/txt2kg/assets/frontend/lib/backend-service.ts b/nvidia/txt2kg/assets/frontend/lib/backend-service.ts
index b2b34d1..7044490 100644
--- a/nvidia/txt2kg/assets/frontend/lib/backend-service.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/backend-service.ts
@@ -32,16 +32,24 @@ import type { Triple } from '@/types/graph';
  */
 export class BackendService {
   private graphDBService: GraphDBService;
-  private pineconeService: QdrantService;
+  private qdrantService: QdrantService;
   private sentenceTransformerUrl: string = 'http://sentence-transformers:80';
   private modelName: string = 'all-MiniLM-L6-v2';
   private static instance: BackendService;
   private initialized: boolean = false;
-  private activeGraphDbType: GraphDBType = 'arangodb';
+  private activeGraphDbType: GraphDBType | null = null; // Set at runtime, not build time
+  
+  private getRuntimeGraphDbType(): GraphDBType {
+    if (this.activeGraphDbType === null) {
+      this.activeGraphDbType = (process.env.GRAPH_DB_TYPE as GraphDBType) || 'arangodb';
+      console.log(`[BackendService] Initialized activeGraphDbType at runtime: ${this.activeGraphDbType}`);
+    }
+    return this.activeGraphDbType;
+  }
   
   private constructor() {
     this.graphDBService = GraphDBService.getInstance();
-    this.pineconeService = QdrantService.getInstance();
+    this.qdrantService = QdrantService.getInstance();
     
     // Use environment variables if available
     if (process.env.SENTENCE_TRANSFORMER_URL) {
@@ -64,16 +72,17 @@ export class BackendService {
   
   /**
    * Initialize the backend services
-   * @param graphDbType - Type of graph database to use (neo4j or arangodb)
+   * @param graphDbType - Type of graph database to use (defaults to GRAPH_DB_TYPE env var)
    */
-  public async initialize(graphDbType: GraphDBType = 'arangodb'): Promise<void> {
-    this.activeGraphDbType = graphDbType;
+  public async initialize(graphDbType?: GraphDBType): Promise<void> {
+    const dbType = graphDbType || (process.env.GRAPH_DB_TYPE as GraphDBType) || 'arangodb';
+    this.activeGraphDbType = dbType;
     
     // Initialize Graph Database
     if (!this.graphDBService.isInitialized()) {
       try {
         // Get the appropriate service based on type
-        const graphDbService = getGraphDbService(graphDbType);
+        const graphDbService = getGraphDbService(dbType);
         
         // Try to get settings from server settings API first
         let serverSettings: Record<string, string> = {};
@@ -88,7 +97,7 @@ export class BackendService {
           console.log('Failed to load settings from server API, falling back to environment variables:', error);
         }
         
-        if (graphDbType === 'neo4j') {
+        if (dbType === 'neo4j') {
           // Get Neo4j credentials from server settings first, then fallback to environment
           const uri = serverSettings.neo4j_url || process.env.NEO4J_URI;
           const username = serverSettings.neo4j_user || process.env.NEO4J_USER || process.env.NEO4J_USERNAME;
@@ -107,9 +116,9 @@ export class BackendService {
           console.log(`Using ArangoDB database: ${dbName}`);
           await this.graphDBService.initialize('arangodb', url, username, password);
         }
-        console.log(`${graphDbType} initialized successfully in backend service`);
+        console.log(`${dbType} initialized successfully in backend service`);
       } catch (error) {
-        console.error(`Failed to initialize ${graphDbType} in backend service:`, error);
+        console.error(`Failed to initialize ${dbType} in backend service:`, error);
         if (process.env.NODE_ENV === 'development') {
           console.log('Development mode: Continuing despite graph database initialization error');
         } else {
@@ -118,9 +127,9 @@ export class BackendService {
       }
     }
     
-    // Initialize Pinecone
-    if (!this.pineconeService.isInitialized()) {
-      await this.pineconeService.initialize();
+    // Initialize Qdrant
+    if (!this.qdrantService.isInitialized()) {
+      await this.qdrantService.initialize();
     }
     
     // Check if sentence-transformer service is available
@@ -151,7 +160,7 @@ export class BackendService {
    * Get the active graph database type
    */
   public getGraphDbType(): GraphDBType {
-    return this.activeGraphDbType;
+    return this.getRuntimeGraphDbType();
   }
   
   /**
@@ -183,7 +192,7 @@ export class BackendService {
   }
   
   /**
-   * Process and store triples in graph database and embeddings in Pinecone
+   * Process and store triples in graph database and embeddings in Qdrant
    */
   public async processTriples(triples: Triple[]): Promise<void> {
     // Preprocess triples: lowercase and remove duplicates
@@ -232,8 +241,8 @@ export class BackendService {
       }
     }
     
-    // Store embeddings and text content in Pinecone
-    await this.pineconeService.storeEmbeddings(entityEmbeddings, textContent);
+    // Store embeddings and text content in Qdrant
+    await this.qdrantService.storeEmbeddings(entityEmbeddings, textContent);
     
     console.log(`Backend processing complete: ${uniqueTriples.length} triples and ${entityList.length} entities stored using ${this.activeGraphDbType}`);
   }
@@ -253,7 +262,7 @@ export class BackendService {
     const filteredKeywords = keywords.filter(kw => !this.isStopWord(kw));
 
     // If using ArangoDB, use its native graph traversal capabilities
-    if (this.activeGraphDbType === 'arangodb') {
+    if (this.getRuntimeGraphDbType() === 'arangodb') {
       console.log(`Using ArangoDB native graph traversal for keywords: ${filteredKeywords.join(', ')}`);
 
       try {
@@ -392,8 +401,8 @@ export class BackendService {
     // Generate embedding for query
     const queryEmbedding = (await this.generateEmbeddings([queryText]))[0];
     
-    // Find nearest neighbors using Pinecone
-    const seedNodes = await this.pineconeService.findSimilarEntities(queryEmbedding, kNeighbors);
+    // Find nearest neighbors using Qdrant
+    const seedNodes = await this.qdrantService.findSimilarEntities(queryEmbedding, kNeighbors);
     console.log(`Found ${seedNodes.length} seed nodes for query: "${queryText}"`);
     
     // Get graph data from graph database
@@ -649,7 +658,7 @@ Answer:`;
     const embeddings = await this.generateEmbeddings(documents);
     
     // Store in Qdrant document-embeddings collection
-    await this.pineconeService.storeDocumentChunks(documents, embeddings, metadata);
+    await this.qdrantService.storeDocumentChunks(documents, embeddings, metadata);
     
     console.log(`✅ Stored ${documents.length} document chunks in document-embeddings collection`);
   }
diff --git a/nvidia/txt2kg/assets/frontend/lib/client-init.ts b/nvidia/txt2kg/assets/frontend/lib/client-init.ts
index 1ef1d35..ad067c0 100644
--- a/nvidia/txt2kg/assets/frontend/lib/client-init.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/client-init.ts
@@ -22,18 +22,17 @@
 /**
  * Initialize default database settings if not already set
  * Called before syncing with server to ensure defaults are available
+ * NOTE: Don't set graph_db_type here - let server's GRAPH_DB_TYPE env var control it
  */
 export function initializeDefaultSettings() {
   if (typeof window === 'undefined') {
     return; // Only run on client side
   }
 
-  // Set default graph DB type to ArangoDB if not set
-  if (!localStorage.getItem('graph_db_type')) {
-    localStorage.setItem('graph_db_type', 'arangodb');
-  }
-
-  // Set default ArangoDB settings if not set
+  // Don't set graph_db_type default - let it be controlled by server's GRAPH_DB_TYPE env var
+  // The server will use its environment variable if no client setting is provided
+  
+  // Set default connection settings only (not the database type selection)
   if (!localStorage.getItem('arango_url')) {
     localStorage.setItem('arango_url', 'http://localhost:8529');
   }
@@ -41,6 +40,11 @@ export function initializeDefaultSettings() {
   if (!localStorage.getItem('arango_db')) {
     localStorage.setItem('arango_db', 'txt2kg');
   }
+  
+  // Set default Neo4j settings
+  if (!localStorage.getItem('neo4j_url')) {
+    localStorage.setItem('neo4j_url', 'bolt://localhost:7687');
+  }
 }
 
 /**
@@ -124,21 +128,6 @@ export async function syncSettingsWithServer() {
     settings.NVIDIA_API_KEY = nvidiaApiKey;
   }
   
-  // Pinecone settings
-  const pineconeApiKey = localStorage.getItem('pinecone_api_key');
-  if (pineconeApiKey) {
-    settings.pinecone_api_key = pineconeApiKey;
-  }
-  
-  const pineconeEnvironment = localStorage.getItem('pinecone_environment');
-  if (pineconeEnvironment) {
-    settings.pinecone_environment = pineconeEnvironment;
-  }
-  
-  const pineconeIndex = localStorage.getItem('pinecone_index');
-  if (pineconeIndex) {
-    settings.pinecone_index = pineconeIndex;
-  }
   
   // Skip the API call if there are no settings to sync
   if (Object.keys(settings).length === 0) {
diff --git a/nvidia/txt2kg/assets/frontend/lib/graph-db-service.ts b/nvidia/txt2kg/assets/frontend/lib/graph-db-service.ts
index 8a0f839..2a1201b 100644
--- a/nvidia/txt2kg/assets/frontend/lib/graph-db-service.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/graph-db-service.ts
@@ -26,7 +26,7 @@ export type GraphDBType = 'neo4j' | 'arangodb';
 export class GraphDBService {
   private neo4jService: Neo4jService;
   private arangoDBService: ArangoDBService;
-  private activeDBType: GraphDBType = 'arangodb'; // Default to ArangoDB
+  private activeDBType: GraphDBType | null = null; // Set at runtime, not build time
   private static instance: GraphDBService;
 
   private constructor() {
@@ -34,6 +34,17 @@ export class GraphDBService {
     this.arangoDBService = ArangoDBService.getInstance();
   }
 
+  /**
+   * Get the active DB type, reading from env at runtime if not set
+   */
+  private getActiveDBType(): GraphDBType {
+    if (this.activeDBType === null) {
+      this.activeDBType = (process.env.GRAPH_DB_TYPE as GraphDBType) || 'arangodb';
+      console.log(`[GraphDBService] Initialized activeDBType at runtime: ${this.activeDBType}`);
+    }
+    return this.activeDBType;
+  }
+
   /**
    * Get the singleton instance of GraphDBService
    */
@@ -46,24 +57,25 @@ export class GraphDBService {
 
   /**
    * Initialize the graph database with the specified type
-   * @param dbType - Type of graph database to use
+   * @param dbType - Type of graph database to use (defaults to GRAPH_DB_TYPE env var)
    * @param uri - Connection URL
    * @param username - Database username
    * @param password - Database password
    */
-  public async initialize(dbType: GraphDBType = 'arangodb', uri?: string, username?: string, password?: string): Promise<void> {
-    this.activeDBType = dbType;
+  public async initialize(dbType?: GraphDBType, uri?: string, username?: string, password?: string): Promise<void> {
+    const graphDbType = dbType || (process.env.GRAPH_DB_TYPE as GraphDBType) || 'arangodb';
+    this.activeDBType = graphDbType;
     
     try {
-      if (dbType === 'neo4j') {
+      if (graphDbType === 'neo4j') {
         this.neo4jService.initialize(uri, username, password);
         console.log('Neo4j initialized successfully');
-      } else if (dbType === 'arangodb') {
+      } else if (graphDbType === 'arangodb') {
         await this.arangoDBService.initialize(uri, undefined, username, password);
         console.log('ArangoDB initialized successfully');
       }
     } catch (error) {
-      console.error(`Failed to initialize ${dbType}:`, error);
+      console.error(`Failed to initialize ${graphDbType}:`, error);
       throw error;
     }
   }
@@ -79,14 +91,14 @@ export class GraphDBService {
    * Get the active graph database type
    */
   public getDBType(): GraphDBType {
-    return this.activeDBType;
+    return this.getActiveDBType();
   }
 
   /**
    * Check if the active database is initialized
    */
   public isInitialized(): boolean {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       return this.neo4jService.isInitialized();
     } else {
       return this.arangoDBService.isInitialized();
@@ -97,7 +109,7 @@ export class GraphDBService {
    * Import triples into the active graph database
    */
   public async importTriples(triples: { subject: string; predicate: string; object: string }[]): Promise<void> {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       await this.neo4jService.importTriples(triples);
     } else {
       await this.arangoDBService.importTriples(triples);
@@ -121,7 +133,7 @@ export class GraphDBService {
       [key: string]: any 
     }>; 
   }> {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       return await this.neo4jService.getGraphData();
     } else {
       return await this.arangoDBService.getGraphData();
@@ -142,7 +154,7 @@ export class GraphDBService {
       resultCount: number;
     }
   ): Promise<void> {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       await this.neo4jService.logQuery(query, queryMode, metrics);
     } else {
       await this.arangoDBService.logQuery(query, queryMode, metrics);
@@ -153,7 +165,7 @@ export class GraphDBService {
    * Get query logs from the active graph database
    */
   public async getQueryLogs(limit: number = 100): Promise<any[]> {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       return await this.neo4jService.getQueryLogs(limit);
     } else {
       return await this.arangoDBService.getQueryLogs(limit);
@@ -164,7 +176,7 @@ export class GraphDBService {
    * Close the connection to the active graph database
    */
   public async close(): Promise<void> {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       this.neo4jService.close();
     } else {
       this.arangoDBService.close();
@@ -175,7 +187,7 @@ export class GraphDBService {
    * Get info about the active graph database driver
    */
   public getDriverInfo(): Record<string, any> {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       return this.neo4jService.getDriverInfo();
     } else {
       return this.arangoDBService.getDriverInfo();
@@ -197,7 +209,7 @@ export class GraphDBService {
     confidence: number;
     depth?: number;
   }>> {
-    if (this.activeDBType === 'arangodb') {
+    if (this.getActiveDBType() === 'arangodb') {
       return await this.arangoDBService.graphTraversal(keywords, maxDepth, maxResults);
     } else {
       // Neo4j doesn't have this method yet, return empty array
@@ -210,7 +222,7 @@ export class GraphDBService {
    * Clear all data from the active graph database
    */
   public async clearDatabase(): Promise<void> {
-    if (this.activeDBType === 'neo4j') {
+    if (this.getActiveDBType() === 'neo4j') {
       // TODO: Implement Neo4j clear database functionality
       throw new Error('Clear database functionality not implemented for Neo4j');
     } else {
diff --git a/nvidia/txt2kg/assets/frontend/lib/graph-db-util.ts b/nvidia/txt2kg/assets/frontend/lib/graph-db-util.ts
index 84752d1..c4c6dc7 100644
--- a/nvidia/txt2kg/assets/frontend/lib/graph-db-util.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/graph-db-util.ts
@@ -18,20 +18,34 @@ import { GraphDBService, GraphDBType } from './graph-db-service';
 import { Neo4jService } from './neo4j';
 import { ArangoDBService } from './arangodb';
 
+/**
+ * Get the default graph database type from environment or fallback to arangodb
+ * Note: This is called at runtime, not build time, so process.env should be available
+ */
+function getDefaultGraphDbType(): GraphDBType {
+  const envType = process.env.GRAPH_DB_TYPE;
+  console.log(`[graph-db-util] getDefaultGraphDbType: env=${envType}`);
+  return (envType as GraphDBType) || 'arangodb';
+}
+
 /**
  * Get the appropriate graph database service based on the graph database type.
  * This is useful for API routes that need direct access to a specific graph database.
  * 
- * @param graphDbType - The type of graph database to use
+ * @param graphDbType - The type of graph database to use (defaults to GRAPH_DB_TYPE env var)
  */
-export function getGraphDbService(graphDbType: GraphDBType = 'arangodb') {
-  if (graphDbType === 'neo4j') {
+export function getGraphDbService(graphDbType?: GraphDBType) {
+  const dbType = graphDbType || getDefaultGraphDbType();
+  
+  if (dbType === 'neo4j') {
     return Neo4jService.getInstance();
-  } else if (graphDbType === 'arangodb') {
+  } else if (dbType === 'arangodb') {
     return ArangoDBService.getInstance();
   } else {
-    // Default to ArangoDB
-    return ArangoDBService.getInstance();
+    // Default based on environment
+    return getDefaultGraphDbType() === 'neo4j' 
+      ? Neo4jService.getInstance() 
+      : ArangoDBService.getInstance();
   }
 }
 
@@ -39,12 +53,13 @@ export function getGraphDbService(graphDbType: GraphDBType = 'arangodb') {
  * Initialize the graph database directly (not using GraphDBService).
  * This is useful for API routes that need direct access to a specific graph database.
  * 
- * @param graphDbType - The type of graph database to use
+ * @param graphDbType - The type of graph database to use (defaults to GRAPH_DB_TYPE env var)
  */
-export async function initializeGraphDb(graphDbType: GraphDBType = 'arangodb'): Promise<void> {
-  const service = getGraphDbService(graphDbType);
+export async function initializeGraphDb(graphDbType?: GraphDBType): Promise<void> {
+  const dbType = graphDbType || getDefaultGraphDbType();
+  const service = getGraphDbService(dbType);
   
-  if (graphDbType === 'neo4j') {
+  if (dbType === 'neo4j') {
     // Get Neo4j credentials from environment
     const uri = process.env.NEO4J_URI;
     const username = process.env.NEO4J_USER || process.env.NEO4J_USERNAME;
@@ -54,7 +69,7 @@ export async function initializeGraphDb(graphDbType: GraphDBType = 'arangodb'):
     if (service instanceof Neo4jService) {
       service.initialize(uri, username, password);
     }
-  } else if (graphDbType === 'arangodb') {
+  } else if (dbType === 'arangodb') {
     // Get ArangoDB credentials from environment
     const url = process.env.ARANGODB_URL;
     const dbName = process.env.ARANGODB_DB;
diff --git a/nvidia/txt2kg/assets/frontend/lib/pinecone.ts b/nvidia/txt2kg/assets/frontend/lib/pinecone.ts
index 23b3a2d..b7b4777 100644
--- a/nvidia/txt2kg/assets/frontend/lib/pinecone.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/pinecone.ts
@@ -1,19 +1,3 @@
-//
-// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 /**
  * Pinecone service for vector embeddings
  * Uses direct API calls for Pinecone local server
diff --git a/nvidia/txt2kg/assets/frontend/lib/qdrant.ts b/nvidia/txt2kg/assets/frontend/lib/qdrant.ts
index 48fe140..cef0ab2 100644
--- a/nvidia/txt2kg/assets/frontend/lib/qdrant.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/qdrant.ts
@@ -16,7 +16,6 @@
 //
 /**
  * Qdrant service for vector embeddings
- * Drop-in replacement for PineconeService
  */
 import { Document } from "@langchain/core/documents";
 import { randomUUID } from "crypto";
@@ -477,7 +476,7 @@ export class QdrantService {
     }
 
     try {
-      // Qdrant doesn't have a direct "get all" like Pinecone
+      // Use scroll API to get points
       // We'll use scroll API to get points
       const response = await this.makeRequest(`/collections/${this.collectionName}/points/scroll`, 'POST', {
         limit: limit,
diff --git a/nvidia/txt2kg/assets/frontend/lib/remote-backend.ts b/nvidia/txt2kg/assets/frontend/lib/remote-backend.ts
index bf9618e..4df6f2d 100644
--- a/nvidia/txt2kg/assets/frontend/lib/remote-backend.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/remote-backend.ts
@@ -28,7 +28,7 @@ import type { Triple } from '@/types/graph';
  */
 export class RemoteBackendService {
   private graphDBService: GraphDBService;
-  private pineconeService: QdrantService;
+  private qdrantService: QdrantService;
   private embeddingsService: EmbeddingsService;
   private textProcessor: TextProcessor;
   private initialized: boolean = false;
@@ -36,7 +36,7 @@ export class RemoteBackendService {
 
   private constructor() {
     this.graphDBService = GraphDBService.getInstance();
-    this.pineconeService = QdrantService.getInstance();
+    this.qdrantService = QdrantService.getInstance();
     this.embeddingsService = EmbeddingsService.getInstance();
     this.textProcessor = TextProcessor.getInstance();
   }
@@ -60,18 +60,19 @@ export class RemoteBackendService {
 
   /**
    * Initialize the remote backend with all required services
-   * @param graphDbType - Type of graph database to use
+   * @param graphDbType - Type of graph database to use (defaults to GRAPH_DB_TYPE env var)
    */
-  public async initialize(graphDbType: GraphDBType = 'arangodb'): Promise<void> {
-    console.log('Initializing remote backend...');
+  public async initialize(graphDbType?: GraphDBType): Promise<void> {
+    const dbType = graphDbType || (process.env.GRAPH_DB_TYPE as GraphDBType) || 'arangodb';
+    console.log(`Initializing remote backend with ${dbType}...`);
     
     // Initialize Graph Database
-    await this.graphDBService.initialize(graphDbType);
-    console.log(`${graphDbType} service initialized`);
+    await this.graphDBService.initialize(dbType);
+    console.log(`${dbType} service initialized`);
     
-    // Initialize Pinecone
-    await this.pineconeService.initialize();
-    console.log('Pinecone service initialized');
+    // Initialize Qdrant
+    await this.qdrantService.initialize();
+    console.log('Qdrant service initialized');
     
     // Initialize Embeddings service
     await this.embeddingsService.initialize();
@@ -179,9 +180,9 @@ export class RemoteBackendService {
       entityMetadata.set(entity, entityData);
     }
     
-    // Store embeddings and metadata in Pinecone
-    await this.pineconeService.storeEmbeddingsWithMetadata(entityEmbeddings, textContent, entityMetadata);
-    console.log('Stored embeddings with metadata in Pinecone');
+    // Store embeddings and metadata in Qdrant
+    await this.qdrantService.storeEmbeddingsWithMetadata(entityEmbeddings, textContent, entityMetadata);
+    console.log('Stored embeddings with metadata in Qdrant');
     
     console.log('Backend created successfully from text');
   }
@@ -224,9 +225,9 @@ export class RemoteBackendService {
       });
     }
     
-    // Store embeddings and metadata in Pinecone
-    await this.pineconeService.storeEmbeddingsWithMetadata(entityEmbeddings, textContent, entityMetadata);
-    console.log('Stored embeddings with metadata in Pinecone');
+    // Store embeddings and metadata in Qdrant
+    await this.qdrantService.storeEmbeddingsWithMetadata(entityEmbeddings, textContent, entityMetadata);
+    console.log('Stored embeddings with metadata in Qdrant');
     
     console.log('Backend created successfully from triples');
   }
@@ -287,8 +288,8 @@ export class RemoteBackendService {
     // Step 1: Generate embedding for query
     const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
     
-    // Step 2: Find nearest neighbors using Pinecone
-    const seedNodes = await this.pineconeService.findSimilarEntities(queryEmbedding, kNeighbors);
+    // Step 2: Find nearest neighbors using Qdrant
+    const seedNodes = await this.qdrantService.findSimilarEntities(queryEmbedding, kNeighbors);
     console.log(`Found ${seedNodes.length} seed nodes using KNN`);
     
     // Step 3: Retrieve graph data from graph database
@@ -552,9 +553,9 @@ export class RemoteBackendService {
     // Step 1: Generate embedding for query
     const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
     
-    // Step 2: Find nearest neighbors using Pinecone with metadata
+    // Step 2: Find nearest neighbors using Qdrant with metadata
     const { entities: seedNodes, metadata: seedMetadata } = 
-      await this.pineconeService.findSimilarEntitiesWithMetadata(queryEmbedding, kNeighbors);
+      await this.qdrantService.findSimilarEntitiesWithMetadata(queryEmbedding, kNeighbors);
     console.log(`Found ${seedNodes.length} seed nodes using KNN with metadata`);
     
     // Step 3: Retrieve graph data from graph database
diff --git a/nvidia/txt2kg/assets/frontend/lib/text-processor.ts b/nvidia/txt2kg/assets/frontend/lib/text-processor.ts
index 6278c2e..21c8cab 100644
--- a/nvidia/txt2kg/assets/frontend/lib/text-processor.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/text-processor.ts
@@ -376,7 +376,7 @@ ${formatInstructions}`;
               }
             ],
             temperature: 0.1,
-            max_tokens: 8192,
+            max_tokens: 4096,  // Reduced to leave room for input tokens in context
             top_p: 0.95
           })
         });
diff --git a/nvidia/txt2kg/assets/frontend/package.json b/nvidia/txt2kg/assets/frontend/package.json
index f64e9d8..6d107f3 100644
--- a/nvidia/txt2kg/assets/frontend/package.json
+++ b/nvidia/txt2kg/assets/frontend/package.json
@@ -3,13 +3,10 @@
   "version": "0.1.0",
   "private": true,
   "scripts": {
-    "predev": "npm run setup-pinecone",
     "dev": "next dev",
-    "prebuild": "npm run setup-pinecone",
     "build": "next build",
     "start": "next start",
-    "lint": "next lint",
-    "setup-pinecone": "node ../scripts/setup-pinecone.js"
+    "lint": "next lint"
   },
   "dependencies": {
     "3d-force-graph": "^1.77.0",
diff --git a/nvidia/txt2kg/assets/frontend/styles/nvidia-build-typography.css b/nvidia/txt2kg/assets/frontend/styles/nvidia-build-typography.css
index 4214066..df7d983 100644
--- a/nvidia/txt2kg/assets/frontend/styles/nvidia-build-typography.css
+++ b/nvidia/txt2kg/assets/frontend/styles/nvidia-build-typography.css
@@ -162,6 +162,26 @@
   @apply w-5 h-5 rounded-md bg-nvidia-green/15 flex items-center justify-center transition-transform duration-200;
 }
 
+/* Tab content wrapper for max-width */
+.nvidia-build-tab-content {
+  @apply w-full max-w-7xl mx-auto;
+}
+
+/* Responsive tab layout */
+@media (max-width: 768px) {
+  .nvidia-build-tabs {
+    @apply flex-col w-full p-1.5 gap-1;
+  }
+  
+  .nvidia-build-tab {
+    @apply w-full justify-start px-4 py-2.5;
+  }
+  
+  .nvidia-build-tab-icon {
+    @apply w-5 h-5;
+  }
+}
+
 /* Dark Mode Optimizations */
 @media (prefers-color-scheme: dark) {
   .nvidia-build-card {
diff --git a/nvidia/txt2kg/assets/scripts/gnn/preprocess_data.py b/nvidia/txt2kg/assets/scripts/gnn/preprocess_data.py
index 73eced4..82be61f 100644
--- a/nvidia/txt2kg/assets/scripts/gnn/preprocess_data.py
+++ b/nvidia/txt2kg/assets/scripts/gnn/preprocess_data.py
@@ -90,92 +90,57 @@ def parse_args():
     
     return parser.parse_args()
 
-def load_triples_from_arangodb(arango_url, arango_db, arango_user, arango_password):
-    """
-    Load triples from ArangoDB for use with the TXT2KG dataset
-    
-    Args:
-        arango_url: ArangoDB connection URL
-        arango_db: ArangoDB database name
-        arango_user: ArangoDB username
-        arango_password: ArangoDB password
-        
-    Returns:
-        Array of triples in the format expected by create_remote_backend_from_triplets
+def load_triples_from_arangodb(arango_url: str, arango_db: str, arango_user: str, arango_password: str) -> list[str]:
     """
+     Load triples from ArangoDB for use with the TXT2KG dataset
+
+     Args:
+         arango_url: ArangoDB connection URL
+         arango_db: ArangoDB database name
+         arango_user: ArangoDB username
+         arango_password: ArangoDB password
+
+     Returns:
+         List of triples in the format "subject predicate object"
+     """
     try:
         # Connect to ArangoDB
         client = ArangoClient(hosts=arango_url)
-        
+
         # Get database (no auth in our docker setup)
         if arango_user and arango_password:
             db = client.db(arango_db, username=arango_user, password=arango_password)
         else:
             db = client.db(arango_db)
-        
-        # Query to get all triples from ArangoDB as structured objects
-        # Handle case sensitivity and trim whitespace
+
+        # Query to get all triples from ArangoDB
+        # Handle case sensitivity, trim whitespace, and deduplication
         aql_query = """
         FOR e IN relationships
-        LET subject = TRIM(DOCUMENT(e._from).name)
-        LET object = TRIM(DOCUMENT(e._to).name)
-        LET predicate = TRIM(e.type)
-        FILTER subject != "" AND predicate != "" AND object != ""
-        RETURN {
-            subject: subject,
-            predicate: predicate,
-            object: object
-        }
+            LET subject = TRIM(DOCUMENT(e._from).name)
+            LET object = TRIM(DOCUMENT(e._to).name)
+            LET predicate = TRIM(e.type)
+            FILTER subject != "" AND predicate != "" AND object != ""
+            COLLECT s = subject, p = predicate, o = object
+            RETURN CONCAT_SEPARATOR(" ", s, p, o)
         """
-        
-        # Execute the query
-        cursor = db.aql.execute(aql_query)
-        triple_dicts = list(cursor)
-        
-        # Format triples as strings in the format expected by PyTorch Geometric
-        # The expected format is a list of strings in the form "subject predicate object"
-        triples = format_triples_for_pytorch_geometric(triple_dicts)
-        
+
+        # Execute the query with streaming for large datasets
+        cursor = db.aql.execute(aql_query, stream=True, batch_size=1000)
+        triples = list(cursor)
+
         print(f"Loaded {len(triples)} triples from ArangoDB")
         # Print sample triples for debugging
         if len(triples) > 0:
             print("Sample triples:")
             for i in range(min(3, len(triples))):
                 print(f"  {triples[i]}")
-        
+
         return triples
     except Exception as error:
         print(f"Error loading triples from ArangoDB: {error}")
         raise error
 
-def format_triples_for_pytorch_geometric(triple_dicts):
-    """
-    Format triples from ArangoDB into the format expected by PyTorch Geometric
-    
-    Args:
-        triple_dicts: List of dictionaries with subject, predicate, object keys
-        
-    Returns:
-        List of strings in the format "subject predicate object"
-    """
-    triples = []
-    # Create a set to avoid duplicates
-    unique_triples = set()
-    
-    for triple_dict in triple_dicts:
-        # Skip any triple with empty values
-        if not triple_dict['subject'] or not triple_dict['predicate'] or not triple_dict['object']:
-            continue
-            
-        # Create a space-separated string in the format that preprocess_triplet expects
-        triple_str = f"{triple_dict['subject']} {triple_dict['predicate']} {triple_dict['object']}"
-        
-        # Only add if not already in the set
-        if triple_str not in unique_triples:
-            unique_triples.add(triple_str)
-            triples.append(triple_str)
-    
-    return triples
 
 def get_data(args):
     # need a JSON dict of Questions and answers, see below for how its used
@@ -190,48 +155,6 @@ def get_data(args):
     
     return json_obj, text_contexts
 
-def validate_triple_format(triples):
-    """
-    Validate and fix triple format if needed to ensure compatibility with preprocess_triplet
-    
-    Args:
-        triples: List of triples to validate
-        
-    Returns:
-        Fixed list of triples in the format expected by preprocess_triplet
-    """
-    validated_triples = []
-    
-    print(f"Validating {len(triples)} triples...")
-    for i, triple in enumerate(triples):
-        # If triple is already a proper string with subject, predicate, object
-        if isinstance(triple, str):
-            parts = triple.split()
-            # Ensure there are at least 3 parts (subject, predicate, object)
-            if len(parts) >= 3:
-                # For strings with more than 3 parts, use first as subject, second as predicate, 
-                # and join the rest as object
-                subject = parts[0]
-                predicate = parts[1]
-                obj = ' '.join(parts[2:])
-                validated_triple = f"{subject} {predicate} {obj}"
-                validated_triples.append(validated_triple)
-            else:
-                print(f"Warning: Triple at index {i} has fewer than 3 parts: {triple}")
-        # If triple is a dictionary with subject, predicate, object keys
-        elif isinstance(triple, dict) and 'subject' in triple and 'predicate' in triple and 'object' in triple:
-            validated_triple = f"{triple['subject']} {triple['predicate']} {triple['object']}"
-            validated_triples.append(validated_triple)
-        # If triple is a tuple or list of length 3
-        elif (isinstance(triple, tuple) or isinstance(triple, list)) and len(triple) == 3:
-            validated_triple = f"{triple[0]} {triple[1]} {triple[2]}"
-            validated_triples.append(validated_triple)
-        else:
-            print(f"Warning: Skipping triple at index {i} with invalid format: {triple}")
-    
-    print(f"Validation complete. {len(validated_triples)} valid triples out of {len(triples)}")
-    return validated_triples
-
 def make_dataset(args):
     """Modified make_dataset function that can use ArangoDB as a data source"""
     # Create output directory if it doesn't exist
@@ -257,13 +180,11 @@ def make_dataset(args):
                 # Load triples from ArangoDB instead of generating with TXT2KG
                 print("Loading triples from ArangoDB...")
                 triples = load_triples_from_arangodb(
-                    args.arango_url, 
-                    args.arango_db, 
-                    args.arango_user, 
+                    args.arango_url,
+                    args.arango_db,
+                    args.arango_user,
                     args.arango_password
                 )
-                # Validate and fix triples format if needed
-                triples = validate_triple_format(triples)
                 # Save triples for future use
                 torch.save(triples, triples_path)
             else:
diff --git a/nvidia/txt2kg/assets/scripts/setup-pinecone.js b/nvidia/txt2kg/assets/scripts/setup-pinecone.js
index 4b17b75..37f4196 100644
--- a/nvidia/txt2kg/assets/scripts/setup-pinecone.js
+++ b/nvidia/txt2kg/assets/scripts/setup-pinecone.js
@@ -1,19 +1,3 @@
-//
-// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
 /**
  * Simplified Pinecone setup script for Docker environments
  */
diff --git a/nvidia/txt2kg/assets/start.sh b/nvidia/txt2kg/assets/start.sh
index a386dcd..e027711 100755
--- a/nvidia/txt2kg/assets/start.sh
+++ b/nvidia/txt2kg/assets/start.sh
@@ -20,7 +20,8 @@
 
 # Parse command line arguments
 DEV_FRONTEND=false
-USE_COMPLETE=false
+USE_VLLM=false
+USE_VECTOR_SEARCH=false
 
 while [[ $# -gt 0 ]]; do
   case $1 in
@@ -28,8 +29,12 @@ while [[ $# -gt 0 ]]; do
       DEV_FRONTEND=true
       shift
       ;;
-    --complete)
-      USE_COMPLETE=true
+    --vllm)
+      USE_VLLM=true
+      shift
+      ;;
+    --vector-search)
+      USE_VECTOR_SEARCH=true
       shift
       ;;
     --help|-h)
@@ -37,14 +42,17 @@ while [[ $# -gt 0 ]]; do
       echo ""
       echo "Options:"
       echo "  --dev-frontend    Run frontend in development mode (without Docker)"
-      echo "  --complete        Use complete stack (vLLM, Pinecone, Sentence Transformers)"
+      echo "  --vllm            Use Neo4j + vLLM (GPU-accelerated, for DGX Spark/GB300)"
+      echo "  --vector-search   Enable vector search services (Qdrant + Sentence Transformers)"
       echo "  --help, -h        Show this help message"
       echo ""
-      echo "Default: Starts minimal stack with Ollama, ArangoDB, and Next.js frontend"
+      echo "Default: Starts ArangoDB + Ollama"
       echo ""
       echo "Examples:"
-      echo "  ./start.sh                # Start minimal demo (recommended)"
-      echo "  ./start.sh --complete     # Start with all optional services"
+      echo "  ./start.sh                       # Default: ArangoDB + Ollama"
+      echo "  ./start.sh --vllm                # Use Neo4j + vLLM (GPU)"
+      echo "  ./start.sh --vector-search       # Add Qdrant + Sentence Transformers"
+      echo "  ./start.sh --vllm --vector-search  # vLLM + vector search"
       exit 0
       ;;
     *)
@@ -120,21 +128,32 @@ if ! docker info &> /dev/null; then
 fi
 echo "✓ Docker permissions OK"
 
-# Build the docker-compose command
-if [ "$USE_COMPLETE" = true ]; then
-  CMD="$DOCKER_COMPOSE_CMD -f $(pwd)/deploy/compose/docker-compose.complete.yml"
-  echo "Using complete stack (Ollama, vLLM, Pinecone, Sentence Transformers)..."
+# Select compose file and build command
+COMPOSE_DIR="$(pwd)/deploy/compose"
+PROFILES=""
+
+if [ "$USE_VLLM" = true ]; then
+  COMPOSE_FILE="$COMPOSE_DIR/docker-compose.vllm.yml"
+  echo "Using Neo4j + vLLM (GPU-accelerated)..."
+  echo "  ⚡ Optimized for DGX Spark/GB300 with unified memory support"
 else
-  CMD="$DOCKER_COMPOSE_CMD -f $(pwd)/deploy/compose/docker-compose.yml"
-  echo "Using minimal configuration (Ollama + ArangoDB only)..."
+  COMPOSE_FILE="$COMPOSE_DIR/docker-compose.yml"
+  echo "Using ArangoDB + Ollama configuration..."
+fi
+
+CMD="$DOCKER_COMPOSE_CMD -f $COMPOSE_FILE"
+
+if [ "$USE_VECTOR_SEARCH" = true ]; then
+  PROFILES="--profile vector-search"
+  echo "Enabling vector search (Qdrant + Sentence Transformers)..."
 fi
 
 # Execute the command
 echo ""
 echo "Starting services..."
-echo "Running: $CMD up -d"
+echo "Running: $CMD $PROFILES up -d"
 cd $(dirname "$0")
-eval "$CMD up -d"
+eval "$CMD $PROFILES up -d"
 
 echo ""
 echo "=========================================="
@@ -143,28 +162,44 @@ echo "=========================================="
 echo ""
 echo "Core Services:"
 echo "  • Web UI: http://localhost:3001"
-echo "  • ArangoDB: http://localhost:8529"
-echo "  • Ollama API: http://localhost:11434"
+if [ "$USE_VLLM" = true ]; then
+  echo "  • Neo4j Browser: http://localhost:7474"
+  echo "  • vLLM API: http://localhost:8001 (GPU-accelerated)"
+else
+  echo "  • ArangoDB: http://localhost:8529"
+  echo "  • Ollama API: http://localhost:11434"
+fi
 echo ""
 
-if [ "$USE_COMPLETE" = true ]; then
-  echo "Additional Services (Complete Stack):"
-  echo "  • Local Pinecone: http://localhost:5081"
+if [ "$USE_VECTOR_SEARCH" = true ]; then
+  echo "Vector Search Services:"
+  echo "  • Qdrant: http://localhost:6333"
   echo "  • Sentence Transformers: http://localhost:8000"
-  echo "  • vLLM API: http://localhost:8001"
   echo ""
 fi
 
 echo "Next steps:"
-echo "  1. Pull an Ollama model (if not already done):"
-echo "     docker exec ollama-compose ollama pull llama3.1:8b"
-echo ""
-echo "  2. Open http://localhost:3001 in your browser"
+if [ "$USE_VLLM" = true ]; then
+  echo "  1. Wait for vLLM to load the model (check logs with: docker logs vllm-service -f)"
+  echo "     Note: First startup may take several minutes to download the model"
+  echo ""
+  echo "  2. Open http://localhost:3001 in your browser"
+else
+  echo "  1. Pull an Ollama model (if not already done):"
+  echo "     docker exec ollama-compose ollama pull llama3.1:8b"
+  echo ""
+  echo "  2. Open http://localhost:3001 in your browser"
+fi
 echo "  3. Upload documents and start building your knowledge graph!"
 echo ""
 echo "Other options:"
 echo "  • Stop services: ./stop.sh"
 echo "  • Run frontend in dev mode: ./start.sh --dev-frontend"
-echo "  • Use complete stack: ./start.sh --complete"
+if [ "$USE_VLLM" = true ]; then
+  echo "  • Use Ollama: ./start.sh (without --vllm)"
+else
+  echo "  • Use vLLM (GPU): ./start.sh --vllm"
+fi
+echo "  • Add vector search: ./start.sh --vector-search"
 echo "  • View logs: docker compose logs -f"
-echo "" 
+echo ""
diff --git a/nvidia/txt2kg/assets/stop.sh b/nvidia/txt2kg/assets/stop.sh
index d67aaf6..650671d 100755
--- a/nvidia/txt2kg/assets/stop.sh
+++ b/nvidia/txt2kg/assets/stop.sh
@@ -18,27 +18,40 @@
 
 # Stop script for txt2kg project
 
+# Check which Docker Compose version is available
+DOCKER_COMPOSE_CMD=""
+if docker compose version &> /dev/null; then
+  DOCKER_COMPOSE_CMD="docker compose"
+elif command -v docker-compose &> /dev/null; then
+  DOCKER_COMPOSE_CMD="docker-compose"
+else
+  echo "Error: Neither 'docker compose' nor 'docker-compose' is available"
+  exit 1
+fi
+
 # Parse command line arguments
-USE_COMPLETE=false
+USE_VLLM=false
+USE_VECTOR_SEARCH=false
 
 while [[ $# -gt 0 ]]; do
   case $1 in
-    --complete)
-      USE_COMPLETE=true
+    --vllm)
+      USE_VLLM=true
+      shift
+      ;;
+    --vector-search)
+      USE_VECTOR_SEARCH=true
       shift
       ;;
     --help|-h)
       echo "Usage: ./stop.sh [OPTIONS]"
       echo ""
       echo "Options:"
-      echo "  --complete        Stop complete stack (vLLM, Pinecone, Sentence Transformers)"
+      echo "  --vllm            Stop vLLM stack (use if you started with --vllm)"
+      echo "  --vector-search   Include vector search services"
       echo "  --help, -h        Show this help message"
       echo ""
-      echo "Default: Stops minimal stack with Ollama, ArangoDB, and Next.js frontend"
-      echo ""
-      echo "Examples:"
-      echo "  ./stop.sh                # Stop minimal demo"
-      echo "  ./stop.sh --complete     # Stop complete stack"
+      echo "Note: Use the same flags you used with ./start.sh"
       exit 0
       ;;
     *)
@@ -49,52 +62,26 @@ while [[ $# -gt 0 ]]; do
   esac
 done
 
-# Check which Docker Compose version is available
-DOCKER_COMPOSE_CMD=""
-if docker compose version &> /dev/null; then
-  DOCKER_COMPOSE_CMD="docker compose"
-elif command -v docker-compose &> /dev/null; then
-  DOCKER_COMPOSE_CMD="docker-compose"
+# Select compose file
+COMPOSE_DIR="$(pwd)/deploy/compose"
+PROFILES=""
+
+if [ "$USE_VLLM" = true ]; then
+  COMPOSE_FILE="$COMPOSE_DIR/docker-compose.vllm.yml"
 else
-  echo "Error: Neither 'docker compose' nor 'docker-compose' is available"
-  echo "Please install Docker Compose: https://docs.docker.com/compose/install/"
-  exit 1
+  COMPOSE_FILE="$COMPOSE_DIR/docker-compose.yml"
 fi
 
-# Check Docker daemon permissions
-if ! docker info &> /dev/null; then
-  echo ""
-  echo "=========================================="
-  echo "ERROR: Docker Permission Denied"
-  echo "=========================================="
-  echo ""
-  echo "You don't have permission to connect to the Docker daemon."
-  echo ""
-  echo "To fix this, add your user to the docker group:"
-  echo "  sudo usermod -aG docker \$USER"
-  echo "  newgrp docker"
-  echo ""
-  exit 1
+CMD="$DOCKER_COMPOSE_CMD -f $COMPOSE_FILE"
+
+if [ "$USE_VECTOR_SEARCH" = true ]; then
+  PROFILES="--profile vector-search"
 fi
 
-# Build the docker-compose command
-if [ "$USE_COMPLETE" = true ]; then
-  CMD="$DOCKER_COMPOSE_CMD -f $(pwd)/deploy/compose/docker-compose.complete.yml"
-  echo "Stopping complete stack..."
-else
-  CMD="$DOCKER_COMPOSE_CMD -f $(pwd)/deploy/compose/docker-compose.yml"
-  echo "Stopping minimal configuration..."
-fi
-
-# Execute the command
-echo "Running: $CMD down"
+echo "Stopping txt2kg services..."
 cd $(dirname "$0")
-eval "$CMD down"
+eval "$CMD $PROFILES down"
 
 echo ""
-echo "=========================================="
-echo "txt2kg has been stopped"
-echo "=========================================="
-echo ""
+echo "All services stopped."
 echo "To start again, run: ./start.sh"
-echo ""
diff --git a/nvidia/vllm/README.md b/nvidia/vllm/README.md
index c2fd17b..6f32742 100644
--- a/nvidia/vllm/README.md
+++ b/nvidia/vllm/README.md
@@ -68,7 +68,8 @@ The following models are supported with vLLM on Spark. All listed models are ava
 | **Phi-4-multimodal-instruct** | NVFP4 | ✅ | `nvidia/Phi-4-multimodal-instruct-FP4` |
 | **Phi-4-reasoning-plus** | FP8 | ✅ | `nvidia/Phi-4-reasoning-plus-FP8` |
 | **Phi-4-reasoning-plus** | NVFP4 | ✅ | `nvidia/Phi-4-reasoning-plus-FP4` |
-
+| **Nemotron3-Nano** | BF16 | ✅ | `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16` |
+| **Nemotron3-Nano** | FP8 | ✅ | `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8` |
 
 > [!NOTE]
 > The Phi-4-multimodal-instruct models require `--trust-remote-code` when launching vLLM.
@@ -118,6 +119,12 @@ export LATEST_VLLM_VERSION=<latest_container_version>
 docker pull nvcr.io/nvidia/vllm:${LATEST_VLLM_VERSION}
 ```
 
+For Nemotron3-Nano model support, please use release version 25.12.post1-py3
+
+```bash
+docker pull nvcr.io/nvidia/vllm:25.12.post1-py3
+```
+
 ## Step 3. Test vLLM in container
 
 Launch the container and start vLLM server with a test model to verify basic functionality.