From 8974ee9913215fef0087fbc75e5f1c1bcbbb65d3 Mon Sep 17 00:00:00 2001
From: Santosh Bhavani <santosh.bhavani@live.com>
Date: Sat, 25 Oct 2025 10:33:48 -0700
Subject: [PATCH] Improve Pure RAG UI and add query mode tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add query mode badge to answer section showing Pure RAG/Traditional Graph/GraphRAG
- Add collapsible reasoning section for <think> tags in answers
- Add markdown rendering support (bold/italic) in answers
- Fix Pure RAG to properly display answers using llmAnswer state
- Hide empty results message for Pure RAG mode
- Update metrics sidebar to show query times by mode instead of overall average
- Add queryTimesByMode field to metrics API and frontend interfaces
- Disable GraphRAG button with "COMING SOON" badge (requires GNN model)
- Fix Qdrant vector store document mapping with contentPayloadKey
- Update console logs to reflect Qdrant instead of Pinecone
- Add @qdrant/js-client-rest dependency to package.json

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .../assets/frontend/app/api/metrics/route.ts  |  33 ++-
 .../assets/frontend/app/rag/metrics/page.tsx  |   1 +
 .../txt2kg/assets/frontend/app/rag/page.tsx   | 119 +++++++---
 .../assets/frontend/components/rag-query.tsx  |  28 +--
 nvidia/txt2kg/assets/frontend/lib/rag.ts      | 222 +++++++++++++-----
 nvidia/txt2kg/assets/frontend/package.json    |   1 +
 6 files changed, 288 insertions(+), 116 deletions(-)
diff --git a/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts b/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts
index ee81e2d..3afea64 100644
--- a/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts
+++ b/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts
@@ -66,11 +66,12 @@ export async function GET(request: NextRequest) {
 
     // Get real query logs instead of mock data
     let queryLogs: QueryLogSummary[] = [];
-    let precision = 0; 
+    let precision = 0;
     let recall = 0;
     let f1Score = 0;
     let avgQueryTime = vectorStats.avgQueryTime || 0;
     let avgRelevance = 0;
+    let queryTimesByMode: Record<string, number> = {};
 
     // Get query logs from file-based logger instead of Neo4j
     try {
@@ -87,25 +88,42 @@ export async function GET(request: NextRequest) {
       // Calculate metrics from the query logs
       if (queryLogs.length > 0) {
         // Calculate metrics from logs with actual data
-        const logsWithMetrics = queryLogs.filter(log => 
-          log.metrics.avgPrecision > 0 || 
-          log.metrics.avgRecall > 0 || 
+        const logsWithMetrics = queryLogs.filter(log =>
+          log.metrics.avgPrecision > 0 ||
+          log.metrics.avgRecall > 0 ||
           log.metrics.avgExecutionTimeMs > 0
         );
-        
+
         const logsWithRelevance = queryLogs.filter(log => log.metrics.avgRelevanceScore > 0);
-        
+
         if (logsWithMetrics.length > 0) {
           precision = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgPrecision || 0), 0) / logsWithMetrics.length;
           recall = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgRecall || 0), 0) / logsWithMetrics.length;
           avgQueryTime = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgExecutionTimeMs || 0), 0) / logsWithMetrics.length;
           f1Score = precision > 0 && recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
         }
-        
+
         if (logsWithRelevance.length > 0) {
           avgRelevance = logsWithRelevance.reduce((sum, log) => sum + (log.metrics.avgRelevanceScore || 0), 0) / logsWithRelevance.length;
         }
       }
+
+      // Calculate per-mode query times
+      const logsByMode = queryLogs.reduce((acc, log) => {
+        const mode = log.queryMode || 'traditional';
+        if (!acc[mode]) acc[mode] = [];
+        acc[mode].push(log);
+        return acc;
+      }, {} as Record<string, typeof queryLogs>);
+
+      Object.entries(logsByMode).forEach(([mode, logs]) => {
+        const logsWithTime = logs.filter(log => log.metrics.avgExecutionTimeMs > 0);
+        if (logsWithTime.length > 0) {
+          queryTimesByMode[mode] = logsWithTime.reduce((sum, log) =>
+            sum + log.metrics.avgExecutionTimeMs, 0
+          ) / logsWithTime.length;
+        }
+      });
     } catch (error) {
       console.warn('Error getting query logs from file:', error);
       // Keep values at 0 instead of using defaults
@@ -132,6 +150,7 @@ export async function GET(request: NextRequest) {
       recall,
       f1Score,
       topQueries,
+      queryTimesByMode: queryTimesByMode || {},
       // Add metadata about query logs
       queryLogStats: {
         totalQueryLogs: queryLogs.length,
diff --git a/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx b/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx
index 304ad72..73cefb6 100644
--- a/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx
+++ b/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx
@@ -17,6 +17,7 @@ interface MetricsData {
   recall: number;
   f1Score: number;
   topQueries: { query: string; count: number }[];
+  queryTimesByMode?: Record<string, number>;
   queryLogStats?: {
     totalQueryLogs: number;
     totalExecutions: number;
diff --git a/nvidia/txt2kg/assets/frontend/app/rag/page.tsx b/nvidia/txt2kg/assets/frontend/app/rag/page.tsx
index 5ca16d3..c5226a6 100644
--- a/nvidia/txt2kg/assets/frontend/app/rag/page.tsx
+++ b/nvidia/txt2kg/assets/frontend/app/rag/page.tsx
@@ -21,6 +21,7 @@ export default function RagPage() {
     avgRelevance: number;
     precision: number;
     recall: number;
+    queryTimesByMode?: Record<string, number>;
   } | null>(null);
   const [currentParams, setCurrentParams] = useState<RagParams>({
     kNeighbors: 4096,
@@ -65,7 +66,8 @@ export default function RagPage() {
             avgQueryTime: data.avgQueryTime,
             avgRelevance: data.avgRelevance,
             precision: data.precision,
-            recall: data.recall
+            recall: data.recall,
+            queryTimesByMode: data.queryTimesByMode
           });
         }
       } catch (error) {
@@ -98,7 +100,7 @@ export default function RagPage() {
       if (params.usePureRag) {
         queryMode = 'pure-rag';
         try {
-          console.log('Using pure RAG with just Pinecone and LangChain for query:', query);
+          console.log('Using pure RAG with Qdrant and NVIDIA LLM for query:', query);
           const ragResponse = await fetch('/api/rag-query', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
@@ -110,16 +112,17 @@ export default function RagPage() {
           
           if (ragResponse.ok) {
             const data = await ragResponse.json();
+            console.log('📥 RAG Response data:', { hasAnswer: !!data.answer, answerLength: data.answer?.length });
             // Handle the answer - we might need to display differently than triples
             if (data.answer) {
-              // Special UI handling for text answer rather than triples
-              setResults([{
-                subject: 'Answer',
-                predicate: '',
-                object: data.answer,
-                usedFallback: data.usedFallback
-              }]);
-              
+              console.log('✅ Setting answer in results:', data.answer.substring(0, 100) + '...');
+
+              // Set the LLM answer for display (same as traditional mode)
+              setLlmAnswer(data.answer);
+
+              // Set empty results array since Pure RAG doesn't return triples
+              setResults([]);
+
               resultCount = 1;
               relevanceScore = data.relevanceScore || 0;
               
@@ -364,22 +367,34 @@ export default function RagPage() {
                 </div>
                 
                 <div className="space-y-3 text-sm">
-                  <div className="flex justify-between">
-                    <span className="text-muted-foreground">Avg. Query Time:</span>
-                    <span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span>
-                  </div>
-                  <div className="flex justify-between">
-                    <span className="text-muted-foreground">Relevance Score:</span>
-                    <span className="font-medium">{metrics.avgRelevance > 0 ? `${(metrics.avgRelevance * 100).toFixed(1)}%` : "No data"}</span>
-                  </div>
-                  <div className="flex justify-between">
-                    <span className="text-muted-foreground">Precision:</span>
-                    <span className="font-medium">{metrics.precision > 0 ? `${(metrics.precision * 100).toFixed(1)}%` : "No data"}</span>
-                  </div>
-                  <div className="flex justify-between">
-                    <span className="text-muted-foreground">Recall:</span>
-                    <span className="font-medium">{metrics.recall > 0 ? `${(metrics.recall * 100).toFixed(1)}%` : "No data"}</span>
-                  </div>
+                  {/* Query times by mode */}
+                  {metrics.queryTimesByMode && Object.keys(metrics.queryTimesByMode).length > 0 ? (
+                    <>
+                      {metrics.queryTimesByMode['pure-rag'] !== undefined && (
+                        <div className="flex justify-between">
+                          <span className="text-muted-foreground">Pure RAG:</span>
+                          <span className="font-medium">{metrics.queryTimesByMode['pure-rag'].toFixed(2)}ms</span>
+                        </div>
+                      )}
+                      {metrics.queryTimesByMode['traditional'] !== undefined && (
+                        <div className="flex justify-between">
+                          <span className="text-muted-foreground">Traditional Graph:</span>
+                          <span className="font-medium">{metrics.queryTimesByMode['traditional'].toFixed(2)}ms</span>
+                        </div>
+                      )}
+                      {metrics.queryTimesByMode['vector-search'] !== undefined && (
+                        <div className="flex justify-between">
+                          <span className="text-muted-foreground">GraphRAG:</span>
+                          <span className="font-medium">{metrics.queryTimesByMode['vector-search'].toFixed(2)}ms</span>
+                        </div>
+                      )}
+                    </>
+                  ) : (
+                    <div className="flex justify-between">
+                      <span className="text-muted-foreground">Avg. Query Time:</span>
+                      <span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span>
+                    </div>
+                  )}
                 </div>
               </div>
             )}
@@ -403,24 +418,64 @@ export default function RagPage() {
                     <SearchIcon className="h-3 w-3 text-nvidia-green" />
                   </div>
                   <h3 className="text-lg font-semibold text-foreground">Answer</h3>
+                  {currentParams.queryMode && (
+                    <span className="text-xs px-2.5 py-1 rounded-full font-medium bg-nvidia-green/10 text-nvidia-green border border-nvidia-green/20">
+                      {currentParams.queryMode === 'pure-rag' ? 'Pure RAG' :
+                       currentParams.queryMode === 'vector-search' ? 'GraphRAG' :
+                       'Traditional Graph'}
+                    </span>
+                  )}
                 </div>
                 <div className="prose prose-sm dark:prose-invert max-w-none">
-                  <div className="bg-muted/20 border border-border/20 p-6 rounded-xl">
-                    <p className="text-foreground leading-relaxed whitespace-pre-wrap">{llmAnswer}</p>
-                  </div>
+                  {(() => {
+                    // Parse <think> tags
+                    const thinkMatch = llmAnswer.match(/<think>([\s\S]*?)<\/think>/);
+                    const thinkContent = thinkMatch ? thinkMatch[1].trim() : null;
+                    const mainAnswer = thinkContent
+                      ? llmAnswer.replace(/<think>[\s\S]*?<\/think>/, '').trim()
+                      : llmAnswer;
+
+                    return (
+                      <>
+                        {thinkContent && (
+                          <details className="mb-4 bg-muted/10 border border-border/20 rounded-xl overflow-hidden">
+                            <summary className="cursor-pointer p-4 hover:bg-muted/20 transition-colors flex items-center gap-2">
+                              <svg className="w-4 h-4 transform transition-transform" style={{ transform: 'rotate(0deg)' }} xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
+                              </svg>
+                              <span className="text-sm font-medium text-muted-foreground">Reasoning Process</span>
+                            </summary>
+                            <div className="p-4 pt-0 text-sm text-muted-foreground leading-relaxed whitespace-pre-wrap border-t border-border/10">
+                              {thinkContent}
+                            </div>
+                          </details>
+                        )}
+                        <div className="bg-muted/20 border border-border/20 p-6 rounded-xl">
+                          <div
+                            className="text-foreground leading-relaxed whitespace-pre-wrap"
+                            dangerouslySetInnerHTML={{
+                              __html: mainAnswer
+                                .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
+                                .replace(/\*(.*?)\*/g, '<em>$1</em>')
+                            }}
+                          />
+                        </div>
+                      </>
+                    );
+                  })()}
                 </div>
               </div>
             )}
             
             {/* Results Section */}
-            {results && results.length > 0 && (
+            {results && results.length > 0 && !currentParams.usePureRag && (
               <div className="mt-8 nvidia-build-card">
                 <div className="flex items-center gap-3 mb-6">
                   <div className="w-6 h-6 rounded-md bg-nvidia-green/15 flex items-center justify-center">
                     <SearchIcon className="h-3 w-3 text-nvidia-green" />
                   </div>
                   <h3 className="text-lg font-semibold text-foreground">
-                    {llmAnswer ? `Supporting Triples (${results.length})` : `Results (${results.length})`}
+                    {llmAnswer ? `Retrieved Knowledge (${results.length})` : `Results (${results.length})`}
                   </h3>
                 </div>
                 <div className="space-y-4">
@@ -464,7 +519,7 @@ export default function RagPage() {
               </div>
             )}
             
-            {results && results.length === 0 && !isLoading && (
+            {results && results.length === 0 && !isLoading && !currentParams.usePureRag && (
               <div className="mt-8 nvidia-build-card border-dashed">
                 <div className="text-center py-8">
                   <div className="w-12 h-12 rounded-xl bg-muted/30 flex items-center justify-center mx-auto mb-4">
diff --git a/nvidia/txt2kg/assets/frontend/components/rag-query.tsx b/nvidia/txt2kg/assets/frontend/components/rag-query.tsx
index 9f6333c..93f93fa 100644
--- a/nvidia/txt2kg/assets/frontend/components/rag-query.tsx
+++ b/nvidia/txt2kg/assets/frontend/components/rag-query.tsx
@@ -145,7 +145,7 @@ export function RagQuery({
             </div>
             <span className="text-sm font-semibold">Pure RAG</span>
             <span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
-              Pinecone + LangChain
+              Vector DB + LLM
             </span>
             {queryMode === 'pure-rag' && (
               <div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
@@ -166,7 +166,7 @@ export function RagQuery({
             </div>
             <span className="text-sm font-semibold">Traditional Graph</span>
             <span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
-              Graph database only
+              Graph DB + LLM
             </span>
             {queryMode === 'traditional' && (
               <div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
@@ -176,28 +176,20 @@ export function RagQuery({
           <button
             type="button"
             onClick={() => handleQueryModeChange('vector-search')}
-            disabled={!vectorEnabled}
-            className={`relative flex flex-col items-center p-3 border rounded-lg transition-all duration-200 hover:shadow-md ${
-              queryMode === 'vector-search' 
-                ? 'border-nvidia-green bg-nvidia-green/10 text-nvidia-green shadow-sm' 
-                : vectorEnabled 
-                  ? 'border-border/40 hover:border-border/60 hover:bg-muted/20' 
-                  : 'border-border/30 opacity-50 cursor-not-allowed'
-            }`}
+            disabled={true}
+            title="GraphRAG requires a GNN model (not yet available)"
+            className="relative flex flex-col items-center p-3 border rounded-lg border-border/30 opacity-50 cursor-not-allowed"
           >
-            <div className="w-5 h-5 rounded-md bg-nvidia-green/15 flex items-center justify-center mb-1.5">
-              <Cpu className="h-2.5 w-2.5 text-nvidia-green" />
+            <div className="w-5 h-5 rounded-md bg-muted/15 flex items-center justify-center mb-1.5">
+              <Cpu className="h-2.5 w-2.5 text-muted-foreground" />
             </div>
-            <span className="text-sm font-semibold">GraphRAG</span>
+            <span className="text-sm font-semibold text-muted-foreground">GraphRAG</span>
             <span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
               RAG + GNN
             </span>
-            <div className="text-[9px] px-1.5 py-0.5 bg-nvidia-green/20 text-nvidia-green rounded mt-1 font-medium">
-              NEW
+            <div className="text-[9px] px-1.5 py-0.5 bg-amber-500/20 text-amber-700 dark:text-amber-500 rounded mt-1 font-medium">
+              COMING SOON
             </div>
-            {queryMode === 'vector-search' && (
-              <div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
-            )}
           </button>
         </div>
       </div>
diff --git a/nvidia/txt2kg/assets/frontend/lib/rag.ts b/nvidia/txt2kg/assets/frontend/lib/rag.ts
index f9bb117..dba9b26 100644
--- a/nvidia/txt2kg/assets/frontend/lib/rag.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/rag.ts
@@ -1,7 +1,7 @@
 /**
  * Retrieval Augmented Generation (RAG) implementation using Qdrant and LangChain
  * This module provides a RetrievalQA chain using Qdrant as the vector store
- * Note: xAI integration has been removed - needs alternative LLM provider implementation
+ * Uses NVIDIA API for LLM inference
  */
 
 import { ChatOpenAI } from "@langchain/openai";
@@ -9,27 +9,42 @@ import { Document } from "@langchain/core/documents";
 import { RunnableSequence } from "@langchain/core/runnables";
 import { StringOutputParser } from "@langchain/core/output_parsers";
 import { PromptTemplate } from "@langchain/core/prompts";
-import { QdrantService, DocumentSearchResult } from './qdrant';
+import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant";
+import { Embeddings } from "@langchain/core/embeddings";
 import { EmbeddingsService } from './embeddings';
 
-// Interface for records to store in Qdrant
-interface QdrantRecord {
-  id: string;
-  values: number[];
-  metadata?: Record<string, any>;
+// Custom embeddings adapter to use our EmbeddingsService with LangChain
+class CustomEmbeddings extends Embeddings {
+  private embeddingsService: EmbeddingsService;
+
+  constructor() {
+    super({});
+    this.embeddingsService = EmbeddingsService.getInstance();
+  }
+
+  async embedDocuments(documents: string[]): Promise<number[][]> {
+    await this.embeddingsService.initialize();
+    return await this.embeddingsService.encode(documents);
+  }
+
+  async embedQuery(query: string): Promise<number[]> {
+    await this.embeddingsService.initialize();
+    const embeddings = await this.embeddingsService.encode([query]);
+    return embeddings[0];
+  }
 }
 
 export class RAGService {
   private static instance: RAGService;
-  private pineconeService: QdrantService;
-  private embeddingsService: EmbeddingsService;
+  private vectorStore: QdrantVectorStore | null = null;
+  private embeddingsService: CustomEmbeddings;
   private llm: ChatOpenAI | null = null;
   private initialized: boolean = false;
   private isInitializing: boolean = false;
+  private collectionName: string = 'entity-embeddings';
 
   private constructor() {
-    this.pineconeService = QdrantService.getInstance();
-    this.embeddingsService = EmbeddingsService.getInstance();
+    this.embeddingsService = new CustomEmbeddings();
   }
 
   /**
@@ -51,28 +66,56 @@ export class RAGService {
     }
 
     this.isInitializing = true;
-    
+
     try {
       console.log('Initializing RAG service...');
 
-      // Initialize dependencies
-      await this.pineconeService.initialize();
-      await this.embeddingsService.initialize();
-
-      // Initialize LLM - Try NVIDIA first, then fall back to error
+      // Check for NVIDIA API key
       const nvidiaApiKey = process.env.NVIDIA_API_KEY;
       if (!nvidiaApiKey) {
-        throw new Error('RAG service requires NVIDIA_API_KEY to be set in environment variables. xAI integration has been removed.');
+        const error = new Error('NVIDIA_API_KEY is required for RAG service. Please set the NVIDIA_API_KEY environment variable.');
+        console.error('❌ RAG Initialization Error:', error.message);
+        throw error;
       }
-      
-      // Note: This is a placeholder - NVIDIA LLM integration would need to be implemented
-      // For now, we'll throw an error to indicate RAG service is not available
-      throw new Error('RAG service is temporarily unavailable after xAI removal. Please implement alternative LLM provider.');
+
+      // Initialize NVIDIA LLM using ChatOpenAI with NVIDIA's base URL
+      this.llm = new ChatOpenAI({
+        modelName: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
+        temperature: 0.2,
+        maxTokens: 1024,
+        openAIApiKey: nvidiaApiKey,
+        configuration: {
+          baseURL: "https://integrate.api.nvidia.com/v1",
+          timeout: 120000, // 120 second timeout
+        },
+        modelKwargs: {
+          top_p: 0.95,
+          frequency_penalty: 0,
+          presence_penalty: 0
+        }
+      });
+
+      console.log('✅ NVIDIA LLM initialized successfully');
+
+      // Initialize Qdrant vector store
+      const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333';
+
+      this.vectorStore = await QdrantVectorStore.fromExistingCollection(
+        this.embeddingsService,
+        {
+          url: qdrantUrl,
+          collectionName: this.collectionName,
+          contentPayloadKey: 'text', // Map payload.text to pageContent
+        }
+      );
+
+      console.log('✅ Qdrant vector store connected successfully');
 
       this.initialized = true;
-      console.log('RAG service initialized successfully');
+      console.log('✅ RAG service initialized successfully');
     } catch (error) {
-      console.error('Error initializing RAG service:', error);
+      console.error('❌ Error initializing RAG service:', error);
+      this.isInitializing = false;
       throw error;
     } finally {
       this.isInitializing = false;
@@ -80,7 +123,7 @@ export class RAGService {
   }
 
   /**
-   * Store documents in Pinecone for retrieval
+   * Store documents in Qdrant for retrieval
    * @param documents Array of text documents to store
    * @param metadata Optional metadata for the documents
    */
@@ -97,29 +140,28 @@ export class RAGService {
       return;
     }
 
-    console.log(`Storing ${documents.length} documents in Pinecone`);
+    if (!this.vectorStore) {
+      throw new Error('Vector store not initialized');
+    }
 
-    // Generate embeddings for documents
-    const embeddings = await this.embeddingsService.encode(documents);
+    console.log(`Storing ${documents.length} documents in Qdrant`);
 
-    // Prepare records for Pinecone
-    const records: PineconeRecord[] = embeddings.map((embedding, i) => ({
-      id: `doc_${Date.now()}_${i}`,
-      values: embedding,
+    // Create Document objects with metadata
+    const docs = documents.map((text, i) => new Document({
+      pageContent: text,
       metadata: {
-        text: documents[i],
         timestamp: new Date().toISOString(),
         ...(metadata && metadata[i] ? metadata[i] : {})
       }
     }));
 
-    // Store in Pinecone
-    await this.pineconeService.upsertVectors(records);
-    console.log(`Successfully stored ${records.length} document embeddings`);
+    // Store in Qdrant using LangChain
+    await this.vectorStore.addDocuments(docs);
+    console.log(`✅ Successfully stored ${docs.length} document embeddings`);
   }
 
   /**
-   * Perform question answering with document retrieval
+   * Perform question answering with document retrieval using proper RAG implementation
    * @param query User query
    * @param topK Number of most similar documents to retrieve
    * @returns Answer generated from relevant context
@@ -133,15 +175,18 @@ export class RAGService {
       throw new Error('LLM not initialized');
     }
 
-    // Generate embedding for query
-    const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
+    if (!this.vectorStore) {
+      throw new Error('Vector store not initialized');
+    }
+
+    console.log(`🔍 Performing RAG query with topK=${topK}`);
+
+    // Use LangChain's similarity search to retrieve relevant documents
+    const similarDocs = await this.vectorStore.similaritySearch(query, topK);
 
-    // Retrieve similar documents from Pinecone
-    const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK);
-    
     if (!similarDocs || similarDocs.length === 0) {
-      console.log('No relevant documents found, falling back to LLM');
-      
+      console.log('⚠️ No relevant documents found, falling back to LLM general knowledge');
+
       // Define prompt template for standalone LLM response
       const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
 You are a helpful assistant answering questions based on your general knowledge.
@@ -167,15 +212,64 @@ Answer:
       return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
     }
 
+    console.log(`✅ Found ${similarDocs.length} relevant documents`);
+
+    // Log first document structure for debugging
+    if (similarDocs.length > 0) {
+      console.log('📄 First document structure:', {
+        hasPageContent: !!similarDocs[0].pageContent,
+        pageContentLength: similarDocs[0].pageContent?.length || 0,
+        hasMetadata: !!similarDocs[0].metadata,
+        metadataKeys: similarDocs[0].metadata ? Object.keys(similarDocs[0].metadata) : []
+      });
+    }
+
     // Extract text from retrieved documents
+    // Support both pageContent (LangChain standard) and metadata.text (legacy format)
     const context = similarDocs
-      .map((doc: DocumentSearchResult) => doc.metadata?.text || '')
-      .filter((text: string) => text.length > 0)
+      .map((doc) => {
+        // Try pageContent first (LangChain standard)
+        if (doc.pageContent && doc.pageContent.trim().length > 0) {
+          return doc.pageContent;
+        }
+        // Fall back to metadata.text (legacy Qdrant storage format)
+        if (doc.metadata?.text && doc.metadata.text.trim().length > 0) {
+          return doc.metadata.text;
+        }
+        return '';
+      })
+      .filter((text) => text.length > 0)
       .join('\n\n');
 
-    // Define prompt template for QA
+    console.log(`📝 Extracted context length: ${context.length} characters`);
+
+    if (!context || context.trim().length === 0) {
+      console.log('⚠️ Retrieved documents have no content, falling back to LLM');
+      const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
+You are a helpful assistant answering questions based on your general knowledge.
+
+Question: {query}
+
+Answer:
+`);
+
+      const fallbackChain = RunnableSequence.from([
+        {
+          query: () => query,
+        },
+        fallbackPromptTemplate,
+        this.llm,
+        new StringOutputParser(),
+      ]);
+
+      const answer = await fallbackChain.invoke({});
+      return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
+    }
+
+    // Define prompt template for RAG
     const promptTemplate = PromptTemplate.fromTemplate(`
-Answer the question based only on the following context:
+Answer the question based only on the following context from the knowledge base.
+If you cannot find the answer in the context, say "I cannot find this information in the knowledge base."
 
 Context:
 {context}
@@ -185,7 +279,7 @@ Question: {query}
 Answer:
 `);
 
-    // Create retrieval chain
+    // Create retrieval chain using RunnableSequence
     const retrievalChain = RunnableSequence.from([
       {
         context: () => context,
@@ -196,9 +290,18 @@ Answer:
       new StringOutputParser(),
     ]);
 
+    console.log('🤖 Generating answer with NVIDIA LLM...');
+
     // Execute chain
-    const answer = await retrievalChain.invoke({});
-    return answer;
+    try {
+      const answer = await retrievalChain.invoke({});
+      console.log('✅ RAG query completed successfully');
+      console.log(`📝 Answer length: ${answer.length} characters`);
+      return answer;
+    } catch (error) {
+      console.error('❌ Error generating answer with NVIDIA LLM:', error);
+      throw error;
+    }
   }
 
   /**
@@ -215,15 +318,16 @@ Answer:
       await this.initialize();
     }
 
-    // Generate embedding for query
-    const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
+    if (!this.vectorStore) {
+      throw new Error('Vector store not initialized');
+    }
 
-    // Retrieve similar documents from Pinecone
-    const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK);
-    
-    return similarDocs.map((doc: DocumentSearchResult) => ({
-      text: doc.metadata?.text || '',
-      score: doc.score,
+    // Use LangChain's similarity search with scores
+    const results = await this.vectorStore.similaritySearchWithScore(query, topK);
+
+    return results.map(([doc, score]) => ({
+      text: doc.pageContent,
+      score: score,
       metadata: doc.metadata
     }));
   }
diff --git a/nvidia/txt2kg/assets/frontend/package.json b/nvidia/txt2kg/assets/frontend/package.json
index afd5fdc..ff15db8 100644
--- a/nvidia/txt2kg/assets/frontend/package.json
+++ b/nvidia/txt2kg/assets/frontend/package.json
@@ -19,6 +19,7 @@
     "@langchain/community": "^0.3.40",
     "@langchain/core": "^0.3.43",
     "@langchain/openai": "^0.5.2",
+    "@qdrant/js-client-rest": "^1.11.0",
     "@radix-ui/react-alert-dialog": "^1.1.4",
     "@radix-ui/react-avatar": "^1.1.2",
     "@radix-ui/react-checkbox": "^1.1.3",