From 8974ee9913215fef0087fbc75e5f1c1bcbbb65d3 Mon Sep 17 00:00:00 2001 From: Santosh Bhavani Date: Sat, 25 Oct 2025 10:33:48 -0700 Subject: [PATCH] Improve Pure RAG UI and add query mode tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add query mode badge to answer section showing Pure RAG/Traditional Graph/GraphRAG - Add collapsible reasoning section for tags in answers - Add markdown rendering support (bold/italic) in answers - Fix Pure RAG to properly display answers using llmAnswer state - Hide empty results message for Pure RAG mode - Update metrics sidebar to show query times by mode instead of overall average - Add queryTimesByMode field to metrics API and frontend interfaces - Disable GraphRAG button with "COMING SOON" badge (requires GNN model) - Fix Qdrant vector store document mapping with contentPayloadKey - Update console logs to reflect Qdrant instead of Pinecone - Add @qdrant/js-client-rest dependency to package.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../assets/frontend/app/api/metrics/route.ts | 33 ++- .../assets/frontend/app/rag/metrics/page.tsx | 1 + .../txt2kg/assets/frontend/app/rag/page.tsx | 119 +++++++--- .../assets/frontend/components/rag-query.tsx | 28 +-- nvidia/txt2kg/assets/frontend/lib/rag.ts | 222 +++++++++++++----- nvidia/txt2kg/assets/frontend/package.json | 1 + 6 files changed, 288 insertions(+), 116 deletions(-) diff --git a/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts b/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts index ee81e2d..3afea64 100644 --- a/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts +++ b/nvidia/txt2kg/assets/frontend/app/api/metrics/route.ts @@ -66,11 +66,12 @@ export async function GET(request: NextRequest) { // Get real query logs instead of mock data let queryLogs: QueryLogSummary[] = []; - let precision = 0; + let precision = 0; let recall = 0; let f1Score = 0; let avgQueryTime = vectorStats.avgQueryTime || 0; let avgRelevance = 0; + let queryTimesByMode: Record = {}; // Get query logs from file-based logger instead of Neo4j try { @@ -87,25 +88,42 @@ export async function GET(request: NextRequest) { // Calculate metrics from the query logs if (queryLogs.length > 0) { // Calculate metrics from logs with actual data - const logsWithMetrics = queryLogs.filter(log => - log.metrics.avgPrecision > 0 || - log.metrics.avgRecall > 0 || + const logsWithMetrics = queryLogs.filter(log => + log.metrics.avgPrecision > 0 || + log.metrics.avgRecall > 0 || log.metrics.avgExecutionTimeMs > 0 ); - + const logsWithRelevance = queryLogs.filter(log => log.metrics.avgRelevanceScore > 0); - + if (logsWithMetrics.length > 0) { precision = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgPrecision || 0), 0) / logsWithMetrics.length; recall = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgRecall || 0), 0) / logsWithMetrics.length; avgQueryTime = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgExecutionTimeMs || 0), 0) / logsWithMetrics.length; f1Score = precision > 0 && recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0; } - + if (logsWithRelevance.length > 0) { avgRelevance = logsWithRelevance.reduce((sum, log) => sum + (log.metrics.avgRelevanceScore || 0), 0) / logsWithRelevance.length; } } + + // Calculate per-mode query times + const logsByMode = queryLogs.reduce((acc, log) => { + const mode = log.queryMode || 'traditional'; + if (!acc[mode]) acc[mode] = []; + acc[mode].push(log); + return acc; + }, {} as Record); + + Object.entries(logsByMode).forEach(([mode, logs]) => { + const logsWithTime = logs.filter(log => log.metrics.avgExecutionTimeMs > 0); + if (logsWithTime.length > 0) { + queryTimesByMode[mode] = logsWithTime.reduce((sum, log) => + sum + log.metrics.avgExecutionTimeMs, 0 + ) / logsWithTime.length; + } + }); } catch (error) { console.warn('Error getting query logs from file:', error); // Keep values at 0 instead of using defaults @@ -132,6 +150,7 @@ export async function GET(request: NextRequest) { recall, f1Score, topQueries, + queryTimesByMode: queryTimesByMode || {}, // Add metadata about query logs queryLogStats: { totalQueryLogs: queryLogs.length, diff --git a/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx b/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx index 304ad72..73cefb6 100644 --- a/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx +++ b/nvidia/txt2kg/assets/frontend/app/rag/metrics/page.tsx @@ -17,6 +17,7 @@ interface MetricsData { recall: number; f1Score: number; topQueries: { query: string; count: number }[]; + queryTimesByMode?: Record; queryLogStats?: { totalQueryLogs: number; totalExecutions: number; diff --git a/nvidia/txt2kg/assets/frontend/app/rag/page.tsx b/nvidia/txt2kg/assets/frontend/app/rag/page.tsx index 5ca16d3..c5226a6 100644 --- a/nvidia/txt2kg/assets/frontend/app/rag/page.tsx +++ b/nvidia/txt2kg/assets/frontend/app/rag/page.tsx @@ -21,6 +21,7 @@ export default function RagPage() { avgRelevance: number; precision: number; recall: number; + queryTimesByMode?: Record; } | null>(null); const [currentParams, setCurrentParams] = useState({ kNeighbors: 4096, @@ -65,7 +66,8 @@ export default function RagPage() { avgQueryTime: data.avgQueryTime, avgRelevance: data.avgRelevance, precision: data.precision, - recall: data.recall + recall: data.recall, + queryTimesByMode: data.queryTimesByMode }); } } catch (error) { @@ -98,7 +100,7 @@ export default function RagPage() { if (params.usePureRag) { queryMode = 'pure-rag'; try { - console.log('Using pure RAG with just Pinecone and LangChain for query:', query); + console.log('Using pure RAG with Qdrant and NVIDIA LLM for query:', query); const ragResponse = await fetch('/api/rag-query', { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -110,16 +112,17 @@ export default function RagPage() { if (ragResponse.ok) { const data = await ragResponse.json(); + console.log('📥 RAG Response data:', { hasAnswer: !!data.answer, answerLength: data.answer?.length }); // Handle the answer - we might need to display differently than triples if (data.answer) { - // Special UI handling for text answer rather than triples - setResults([{ - subject: 'Answer', - predicate: '', - object: data.answer, - usedFallback: data.usedFallback - }]); - + console.log('✅ Setting answer in results:', data.answer.substring(0, 100) + '...'); + + // Set the LLM answer for display (same as traditional mode) + setLlmAnswer(data.answer); + + // Set empty results array since Pure RAG doesn't return triples + setResults([]); + resultCount = 1; relevanceScore = data.relevanceScore || 0; @@ -364,22 +367,34 @@ export default function RagPage() {
-
- Avg. Query Time: - {metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"} -
-
- Relevance Score: - {metrics.avgRelevance > 0 ? `${(metrics.avgRelevance * 100).toFixed(1)}%` : "No data"} -
-
- Precision: - {metrics.precision > 0 ? `${(metrics.precision * 100).toFixed(1)}%` : "No data"} -
-
- Recall: - {metrics.recall > 0 ? `${(metrics.recall * 100).toFixed(1)}%` : "No data"} -
+ {/* Query times by mode */} + {metrics.queryTimesByMode && Object.keys(metrics.queryTimesByMode).length > 0 ? ( + <> + {metrics.queryTimesByMode['pure-rag'] !== undefined && ( +
+ Pure RAG: + {metrics.queryTimesByMode['pure-rag'].toFixed(2)}ms +
+ )} + {metrics.queryTimesByMode['traditional'] !== undefined && ( +
+ Traditional Graph: + {metrics.queryTimesByMode['traditional'].toFixed(2)}ms +
+ )} + {metrics.queryTimesByMode['vector-search'] !== undefined && ( +
+ GraphRAG: + {metrics.queryTimesByMode['vector-search'].toFixed(2)}ms +
+ )} + + ) : ( +
+ Avg. Query Time: + {metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"} +
+ )}
)} @@ -403,24 +418,64 @@ export default function RagPage() {

Answer

+ {currentParams.queryMode && ( + + {currentParams.queryMode === 'pure-rag' ? 'Pure RAG' : + currentParams.queryMode === 'vector-search' ? 'GraphRAG' : + 'Traditional Graph'} + + )}
-
-

{llmAnswer}

-
+ {(() => { + // Parse tags + const thinkMatch = llmAnswer.match(/([\s\S]*?)<\/think>/); + const thinkContent = thinkMatch ? thinkMatch[1].trim() : null; + const mainAnswer = thinkContent + ? llmAnswer.replace(/[\s\S]*?<\/think>/, '').trim() + : llmAnswer; + + return ( + <> + {thinkContent && ( +
+ + + + + Reasoning Process + +
+ {thinkContent} +
+
+ )} +
+
$1') + .replace(/\*(.*?)\*/g, '$1') + }} + /> +
+ + ); + })()}
)} {/* Results Section */} - {results && results.length > 0 && ( + {results && results.length > 0 && !currentParams.usePureRag && (

- {llmAnswer ? `Supporting Triples (${results.length})` : `Results (${results.length})`} + {llmAnswer ? `Retrieved Knowledge (${results.length})` : `Results (${results.length})`}

@@ -464,7 +519,7 @@ export default function RagPage() {
)} - {results && results.length === 0 && !isLoading && ( + {results && results.length === 0 && !isLoading && !currentParams.usePureRag && (
diff --git a/nvidia/txt2kg/assets/frontend/components/rag-query.tsx b/nvidia/txt2kg/assets/frontend/components/rag-query.tsx index 9f6333c..93f93fa 100644 --- a/nvidia/txt2kg/assets/frontend/components/rag-query.tsx +++ b/nvidia/txt2kg/assets/frontend/components/rag-query.tsx @@ -145,7 +145,7 @@ export function RagQuery({
Pure RAG - Pinecone + LangChain + Vector DB + LLM {queryMode === 'pure-rag' && (
@@ -166,7 +166,7 @@ export function RagQuery({
Traditional Graph - Graph database only + Graph DB + LLM {queryMode === 'traditional' && (
@@ -176,28 +176,20 @@ export function RagQuery({
diff --git a/nvidia/txt2kg/assets/frontend/lib/rag.ts b/nvidia/txt2kg/assets/frontend/lib/rag.ts index f9bb117..dba9b26 100644 --- a/nvidia/txt2kg/assets/frontend/lib/rag.ts +++ b/nvidia/txt2kg/assets/frontend/lib/rag.ts @@ -1,7 +1,7 @@ /** * Retrieval Augmented Generation (RAG) implementation using Qdrant and LangChain * This module provides a RetrievalQA chain using Qdrant as the vector store - * Note: xAI integration has been removed - needs alternative LLM provider implementation + * Uses NVIDIA API for LLM inference */ import { ChatOpenAI } from "@langchain/openai"; @@ -9,27 +9,42 @@ import { Document } from "@langchain/core/documents"; import { RunnableSequence } from "@langchain/core/runnables"; import { StringOutputParser } from "@langchain/core/output_parsers"; import { PromptTemplate } from "@langchain/core/prompts"; -import { QdrantService, DocumentSearchResult } from './qdrant'; +import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant"; +import { Embeddings } from "@langchain/core/embeddings"; import { EmbeddingsService } from './embeddings'; -// Interface for records to store in Qdrant -interface QdrantRecord { - id: string; - values: number[]; - metadata?: Record; +// Custom embeddings adapter to use our EmbeddingsService with LangChain +class CustomEmbeddings extends Embeddings { + private embeddingsService: EmbeddingsService; + + constructor() { + super({}); + this.embeddingsService = EmbeddingsService.getInstance(); + } + + async embedDocuments(documents: string[]): Promise { + await this.embeddingsService.initialize(); + return await this.embeddingsService.encode(documents); + } + + async embedQuery(query: string): Promise { + await this.embeddingsService.initialize(); + const embeddings = await this.embeddingsService.encode([query]); + return embeddings[0]; + } } export class RAGService { private static instance: RAGService; - private pineconeService: QdrantService; - private embeddingsService: EmbeddingsService; + private vectorStore: QdrantVectorStore | null = null; + private embeddingsService: CustomEmbeddings; private llm: ChatOpenAI | null = null; private initialized: boolean = false; private isInitializing: boolean = false; + private collectionName: string = 'entity-embeddings'; private constructor() { - this.pineconeService = QdrantService.getInstance(); - this.embeddingsService = EmbeddingsService.getInstance(); + this.embeddingsService = new CustomEmbeddings(); } /** @@ -51,28 +66,56 @@ export class RAGService { } this.isInitializing = true; - + try { console.log('Initializing RAG service...'); - // Initialize dependencies - await this.pineconeService.initialize(); - await this.embeddingsService.initialize(); - - // Initialize LLM - Try NVIDIA first, then fall back to error + // Check for NVIDIA API key const nvidiaApiKey = process.env.NVIDIA_API_KEY; if (!nvidiaApiKey) { - throw new Error('RAG service requires NVIDIA_API_KEY to be set in environment variables. xAI integration has been removed.'); + const error = new Error('NVIDIA_API_KEY is required for RAG service. Please set the NVIDIA_API_KEY environment variable.'); + console.error('❌ RAG Initialization Error:', error.message); + throw error; } - - // Note: This is a placeholder - NVIDIA LLM integration would need to be implemented - // For now, we'll throw an error to indicate RAG service is not available - throw new Error('RAG service is temporarily unavailable after xAI removal. Please implement alternative LLM provider.'); + + // Initialize NVIDIA LLM using ChatOpenAI with NVIDIA's base URL + this.llm = new ChatOpenAI({ + modelName: "nvidia/llama-3.3-nemotron-super-49b-v1.5", + temperature: 0.2, + maxTokens: 1024, + openAIApiKey: nvidiaApiKey, + configuration: { + baseURL: "https://integrate.api.nvidia.com/v1", + timeout: 120000, // 120 second timeout + }, + modelKwargs: { + top_p: 0.95, + frequency_penalty: 0, + presence_penalty: 0 + } + }); + + console.log('✅ NVIDIA LLM initialized successfully'); + + // Initialize Qdrant vector store + const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333'; + + this.vectorStore = await QdrantVectorStore.fromExistingCollection( + this.embeddingsService, + { + url: qdrantUrl, + collectionName: this.collectionName, + contentPayloadKey: 'text', // Map payload.text to pageContent + } + ); + + console.log('✅ Qdrant vector store connected successfully'); this.initialized = true; - console.log('RAG service initialized successfully'); + console.log('✅ RAG service initialized successfully'); } catch (error) { - console.error('Error initializing RAG service:', error); + console.error('❌ Error initializing RAG service:', error); + this.isInitializing = false; throw error; } finally { this.isInitializing = false; @@ -80,7 +123,7 @@ export class RAGService { } /** - * Store documents in Pinecone for retrieval + * Store documents in Qdrant for retrieval * @param documents Array of text documents to store * @param metadata Optional metadata for the documents */ @@ -97,29 +140,28 @@ export class RAGService { return; } - console.log(`Storing ${documents.length} documents in Pinecone`); + if (!this.vectorStore) { + throw new Error('Vector store not initialized'); + } - // Generate embeddings for documents - const embeddings = await this.embeddingsService.encode(documents); + console.log(`Storing ${documents.length} documents in Qdrant`); - // Prepare records for Pinecone - const records: PineconeRecord[] = embeddings.map((embedding, i) => ({ - id: `doc_${Date.now()}_${i}`, - values: embedding, + // Create Document objects with metadata + const docs = documents.map((text, i) => new Document({ + pageContent: text, metadata: { - text: documents[i], timestamp: new Date().toISOString(), ...(metadata && metadata[i] ? metadata[i] : {}) } })); - // Store in Pinecone - await this.pineconeService.upsertVectors(records); - console.log(`Successfully stored ${records.length} document embeddings`); + // Store in Qdrant using LangChain + await this.vectorStore.addDocuments(docs); + console.log(`✅ Successfully stored ${docs.length} document embeddings`); } /** - * Perform question answering with document retrieval + * Perform question answering with document retrieval using proper RAG implementation * @param query User query * @param topK Number of most similar documents to retrieve * @returns Answer generated from relevant context @@ -133,15 +175,18 @@ export class RAGService { throw new Error('LLM not initialized'); } - // Generate embedding for query - const queryEmbedding = (await this.embeddingsService.encode([query]))[0]; + if (!this.vectorStore) { + throw new Error('Vector store not initialized'); + } + + console.log(`🔍 Performing RAG query with topK=${topK}`); + + // Use LangChain's similarity search to retrieve relevant documents + const similarDocs = await this.vectorStore.similaritySearch(query, topK); - // Retrieve similar documents from Pinecone - const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK); - if (!similarDocs || similarDocs.length === 0) { - console.log('No relevant documents found, falling back to LLM'); - + console.log('⚠️ No relevant documents found, falling back to LLM general knowledge'); + // Define prompt template for standalone LLM response const fallbackPromptTemplate = PromptTemplate.fromTemplate(` You are a helpful assistant answering questions based on your general knowledge. @@ -167,15 +212,64 @@ Answer: return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`; } + console.log(`✅ Found ${similarDocs.length} relevant documents`); + + // Log first document structure for debugging + if (similarDocs.length > 0) { + console.log('📄 First document structure:', { + hasPageContent: !!similarDocs[0].pageContent, + pageContentLength: similarDocs[0].pageContent?.length || 0, + hasMetadata: !!similarDocs[0].metadata, + metadataKeys: similarDocs[0].metadata ? Object.keys(similarDocs[0].metadata) : [] + }); + } + // Extract text from retrieved documents + // Support both pageContent (LangChain standard) and metadata.text (legacy format) const context = similarDocs - .map((doc: DocumentSearchResult) => doc.metadata?.text || '') - .filter((text: string) => text.length > 0) + .map((doc) => { + // Try pageContent first (LangChain standard) + if (doc.pageContent && doc.pageContent.trim().length > 0) { + return doc.pageContent; + } + // Fall back to metadata.text (legacy Qdrant storage format) + if (doc.metadata?.text && doc.metadata.text.trim().length > 0) { + return doc.metadata.text; + } + return ''; + }) + .filter((text) => text.length > 0) .join('\n\n'); - // Define prompt template for QA + console.log(`📝 Extracted context length: ${context.length} characters`); + + if (!context || context.trim().length === 0) { + console.log('⚠️ Retrieved documents have no content, falling back to LLM'); + const fallbackPromptTemplate = PromptTemplate.fromTemplate(` +You are a helpful assistant answering questions based on your general knowledge. + +Question: {query} + +Answer: +`); + + const fallbackChain = RunnableSequence.from([ + { + query: () => query, + }, + fallbackPromptTemplate, + this.llm, + new StringOutputParser(), + ]); + + const answer = await fallbackChain.invoke({}); + return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`; + } + + // Define prompt template for RAG const promptTemplate = PromptTemplate.fromTemplate(` -Answer the question based only on the following context: +Answer the question based only on the following context from the knowledge base. +If you cannot find the answer in the context, say "I cannot find this information in the knowledge base." Context: {context} @@ -185,7 +279,7 @@ Question: {query} Answer: `); - // Create retrieval chain + // Create retrieval chain using RunnableSequence const retrievalChain = RunnableSequence.from([ { context: () => context, @@ -196,9 +290,18 @@ Answer: new StringOutputParser(), ]); + console.log('🤖 Generating answer with NVIDIA LLM...'); + // Execute chain - const answer = await retrievalChain.invoke({}); - return answer; + try { + const answer = await retrievalChain.invoke({}); + console.log('✅ RAG query completed successfully'); + console.log(`📝 Answer length: ${answer.length} characters`); + return answer; + } catch (error) { + console.error('❌ Error generating answer with NVIDIA LLM:', error); + throw error; + } } /** @@ -215,15 +318,16 @@ Answer: await this.initialize(); } - // Generate embedding for query - const queryEmbedding = (await this.embeddingsService.encode([query]))[0]; + if (!this.vectorStore) { + throw new Error('Vector store not initialized'); + } - // Retrieve similar documents from Pinecone - const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK); - - return similarDocs.map((doc: DocumentSearchResult) => ({ - text: doc.metadata?.text || '', - score: doc.score, + // Use LangChain's similarity search with scores + const results = await this.vectorStore.similaritySearchWithScore(query, topK); + + return results.map(([doc, score]) => ({ + text: doc.pageContent, + score: score, metadata: doc.metadata })); } diff --git a/nvidia/txt2kg/assets/frontend/package.json b/nvidia/txt2kg/assets/frontend/package.json index afd5fdc..ff15db8 100644 --- a/nvidia/txt2kg/assets/frontend/package.json +++ b/nvidia/txt2kg/assets/frontend/package.json @@ -19,6 +19,7 @@ "@langchain/community": "^0.3.40", "@langchain/core": "^0.3.43", "@langchain/openai": "^0.5.2", + "@qdrant/js-client-rest": "^1.11.0", "@radix-ui/react-alert-dialog": "^1.1.4", "@radix-ui/react-avatar": "^1.1.2", "@radix-ui/react-checkbox": "^1.1.3",