Improve Pure RAG UI and add query mode tracking

- Add query mode badge to answer section showing Pure RAG/Traditional Graph/GraphRAG
- Add collapsible reasoning section for <think> tags in answers
- Add markdown rendering support (bold/italic) in answers
- Fix Pure RAG to properly display answers using llmAnswer state
- Hide empty results message for Pure RAG mode
- Update metrics sidebar to show query times by mode instead of overall average
- Add queryTimesByMode field to metrics API and frontend interfaces
- Disable GraphRAG button with "COMING SOON" badge (requires GNN model)
- Fix Qdrant vector store document mapping with contentPayloadKey
- Update console logs to reflect Qdrant instead of Pinecone
- Add @qdrant/js-client-rest dependency to package.json

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Santosh Bhavani 2025-10-25 10:33:48 -07:00
parent de9c46e97e
commit 8974ee9913
6 changed files with 288 additions and 116 deletions

View File

@ -71,6 +71,7 @@ export async function GET(request: NextRequest) {
let f1Score = 0; let f1Score = 0;
let avgQueryTime = vectorStats.avgQueryTime || 0; let avgQueryTime = vectorStats.avgQueryTime || 0;
let avgRelevance = 0; let avgRelevance = 0;
let queryTimesByMode: Record<string, number> = {};
// Get query logs from file-based logger instead of Neo4j // Get query logs from file-based logger instead of Neo4j
try { try {
@ -106,6 +107,23 @@ export async function GET(request: NextRequest) {
avgRelevance = logsWithRelevance.reduce((sum, log) => sum + (log.metrics.avgRelevanceScore || 0), 0) / logsWithRelevance.length; avgRelevance = logsWithRelevance.reduce((sum, log) => sum + (log.metrics.avgRelevanceScore || 0), 0) / logsWithRelevance.length;
} }
} }
// Calculate per-mode query times
const logsByMode = queryLogs.reduce((acc, log) => {
const mode = log.queryMode || 'traditional';
if (!acc[mode]) acc[mode] = [];
acc[mode].push(log);
return acc;
}, {} as Record<string, typeof queryLogs>);
Object.entries(logsByMode).forEach(([mode, logs]) => {
const logsWithTime = logs.filter(log => log.metrics.avgExecutionTimeMs > 0);
if (logsWithTime.length > 0) {
queryTimesByMode[mode] = logsWithTime.reduce((sum, log) =>
sum + log.metrics.avgExecutionTimeMs, 0
) / logsWithTime.length;
}
});
} catch (error) { } catch (error) {
console.warn('Error getting query logs from file:', error); console.warn('Error getting query logs from file:', error);
// Keep values at 0 instead of using defaults // Keep values at 0 instead of using defaults
@ -132,6 +150,7 @@ export async function GET(request: NextRequest) {
recall, recall,
f1Score, f1Score,
topQueries, topQueries,
queryTimesByMode: queryTimesByMode || {},
// Add metadata about query logs // Add metadata about query logs
queryLogStats: { queryLogStats: {
totalQueryLogs: queryLogs.length, totalQueryLogs: queryLogs.length,

View File

@ -17,6 +17,7 @@ interface MetricsData {
recall: number; recall: number;
f1Score: number; f1Score: number;
topQueries: { query: string; count: number }[]; topQueries: { query: string; count: number }[];
queryTimesByMode?: Record<string, number>;
queryLogStats?: { queryLogStats?: {
totalQueryLogs: number; totalQueryLogs: number;
totalExecutions: number; totalExecutions: number;

View File

@ -21,6 +21,7 @@ export default function RagPage() {
avgRelevance: number; avgRelevance: number;
precision: number; precision: number;
recall: number; recall: number;
queryTimesByMode?: Record<string, number>;
} | null>(null); } | null>(null);
const [currentParams, setCurrentParams] = useState<RagParams>({ const [currentParams, setCurrentParams] = useState<RagParams>({
kNeighbors: 4096, kNeighbors: 4096,
@ -65,7 +66,8 @@ export default function RagPage() {
avgQueryTime: data.avgQueryTime, avgQueryTime: data.avgQueryTime,
avgRelevance: data.avgRelevance, avgRelevance: data.avgRelevance,
precision: data.precision, precision: data.precision,
recall: data.recall recall: data.recall,
queryTimesByMode: data.queryTimesByMode
}); });
} }
} catch (error) { } catch (error) {
@ -98,7 +100,7 @@ export default function RagPage() {
if (params.usePureRag) { if (params.usePureRag) {
queryMode = 'pure-rag'; queryMode = 'pure-rag';
try { try {
console.log('Using pure RAG with just Pinecone and LangChain for query:', query); console.log('Using pure RAG with Qdrant and NVIDIA LLM for query:', query);
const ragResponse = await fetch('/api/rag-query', { const ragResponse = await fetch('/api/rag-query', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
@ -110,15 +112,16 @@ export default function RagPage() {
if (ragResponse.ok) { if (ragResponse.ok) {
const data = await ragResponse.json(); const data = await ragResponse.json();
console.log('📥 RAG Response data:', { hasAnswer: !!data.answer, answerLength: data.answer?.length });
// Handle the answer - we might need to display differently than triples // Handle the answer - we might need to display differently than triples
if (data.answer) { if (data.answer) {
// Special UI handling for text answer rather than triples console.log('✅ Setting answer in results:', data.answer.substring(0, 100) + '...');
setResults([{
subject: 'Answer', // Set the LLM answer for display (same as traditional mode)
predicate: '', setLlmAnswer(data.answer);
object: data.answer,
usedFallback: data.usedFallback // Set empty results array since Pure RAG doesn't return triples
}]); setResults([]);
resultCount = 1; resultCount = 1;
relevanceScore = data.relevanceScore || 0; relevanceScore = data.relevanceScore || 0;
@ -364,22 +367,34 @@ export default function RagPage() {
</div> </div>
<div className="space-y-3 text-sm"> <div className="space-y-3 text-sm">
{/* Query times by mode */}
{metrics.queryTimesByMode && Object.keys(metrics.queryTimesByMode).length > 0 ? (
<>
{metrics.queryTimesByMode['pure-rag'] !== undefined && (
<div className="flex justify-between">
<span className="text-muted-foreground">Pure RAG:</span>
<span className="font-medium">{metrics.queryTimesByMode['pure-rag'].toFixed(2)}ms</span>
</div>
)}
{metrics.queryTimesByMode['traditional'] !== undefined && (
<div className="flex justify-between">
<span className="text-muted-foreground">Traditional Graph:</span>
<span className="font-medium">{metrics.queryTimesByMode['traditional'].toFixed(2)}ms</span>
</div>
)}
{metrics.queryTimesByMode['vector-search'] !== undefined && (
<div className="flex justify-between">
<span className="text-muted-foreground">GraphRAG:</span>
<span className="font-medium">{metrics.queryTimesByMode['vector-search'].toFixed(2)}ms</span>
</div>
)}
</>
) : (
<div className="flex justify-between"> <div className="flex justify-between">
<span className="text-muted-foreground">Avg. Query Time:</span> <span className="text-muted-foreground">Avg. Query Time:</span>
<span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span> <span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span>
</div> </div>
<div className="flex justify-between"> )}
<span className="text-muted-foreground">Relevance Score:</span>
<span className="font-medium">{metrics.avgRelevance > 0 ? `${(metrics.avgRelevance * 100).toFixed(1)}%` : "No data"}</span>
</div>
<div className="flex justify-between">
<span className="text-muted-foreground">Precision:</span>
<span className="font-medium">{metrics.precision > 0 ? `${(metrics.precision * 100).toFixed(1)}%` : "No data"}</span>
</div>
<div className="flex justify-between">
<span className="text-muted-foreground">Recall:</span>
<span className="font-medium">{metrics.recall > 0 ? `${(metrics.recall * 100).toFixed(1)}%` : "No data"}</span>
</div>
</div> </div>
</div> </div>
)} )}
@ -403,24 +418,64 @@ export default function RagPage() {
<SearchIcon className="h-3 w-3 text-nvidia-green" /> <SearchIcon className="h-3 w-3 text-nvidia-green" />
</div> </div>
<h3 className="text-lg font-semibold text-foreground">Answer</h3> <h3 className="text-lg font-semibold text-foreground">Answer</h3>
{currentParams.queryMode && (
<span className="text-xs px-2.5 py-1 rounded-full font-medium bg-nvidia-green/10 text-nvidia-green border border-nvidia-green/20">
{currentParams.queryMode === 'pure-rag' ? 'Pure RAG' :
currentParams.queryMode === 'vector-search' ? 'GraphRAG' :
'Traditional Graph'}
</span>
)}
</div> </div>
<div className="prose prose-sm dark:prose-invert max-w-none"> <div className="prose prose-sm dark:prose-invert max-w-none">
<div className="bg-muted/20 border border-border/20 p-6 rounded-xl"> {(() => {
<p className="text-foreground leading-relaxed whitespace-pre-wrap">{llmAnswer}</p> // Parse <think> tags
const thinkMatch = llmAnswer.match(/<think>([\s\S]*?)<\/think>/);
const thinkContent = thinkMatch ? thinkMatch[1].trim() : null;
const mainAnswer = thinkContent
? llmAnswer.replace(/<think>[\s\S]*?<\/think>/, '').trim()
: llmAnswer;
return (
<>
{thinkContent && (
<details className="mb-4 bg-muted/10 border border-border/20 rounded-xl overflow-hidden">
<summary className="cursor-pointer p-4 hover:bg-muted/20 transition-colors flex items-center gap-2">
<svg className="w-4 h-4 transform transition-transform" style={{ transform: 'rotate(0deg)' }} xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
</svg>
<span className="text-sm font-medium text-muted-foreground">Reasoning Process</span>
</summary>
<div className="p-4 pt-0 text-sm text-muted-foreground leading-relaxed whitespace-pre-wrap border-t border-border/10">
{thinkContent}
</div> </div>
</details>
)}
<div className="bg-muted/20 border border-border/20 p-6 rounded-xl">
<div
className="text-foreground leading-relaxed whitespace-pre-wrap"
dangerouslySetInnerHTML={{
__html: mainAnswer
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
.replace(/\*(.*?)\*/g, '<em>$1</em>')
}}
/>
</div>
</>
);
})()}
</div> </div>
</div> </div>
)} )}
{/* Results Section */} {/* Results Section */}
{results && results.length > 0 && ( {results && results.length > 0 && !currentParams.usePureRag && (
<div className="mt-8 nvidia-build-card"> <div className="mt-8 nvidia-build-card">
<div className="flex items-center gap-3 mb-6"> <div className="flex items-center gap-3 mb-6">
<div className="w-6 h-6 rounded-md bg-nvidia-green/15 flex items-center justify-center"> <div className="w-6 h-6 rounded-md bg-nvidia-green/15 flex items-center justify-center">
<SearchIcon className="h-3 w-3 text-nvidia-green" /> <SearchIcon className="h-3 w-3 text-nvidia-green" />
</div> </div>
<h3 className="text-lg font-semibold text-foreground"> <h3 className="text-lg font-semibold text-foreground">
{llmAnswer ? `Supporting Triples (${results.length})` : `Results (${results.length})`} {llmAnswer ? `Retrieved Knowledge (${results.length})` : `Results (${results.length})`}
</h3> </h3>
</div> </div>
<div className="space-y-4"> <div className="space-y-4">
@ -464,7 +519,7 @@ export default function RagPage() {
</div> </div>
)} )}
{results && results.length === 0 && !isLoading && ( {results && results.length === 0 && !isLoading && !currentParams.usePureRag && (
<div className="mt-8 nvidia-build-card border-dashed"> <div className="mt-8 nvidia-build-card border-dashed">
<div className="text-center py-8"> <div className="text-center py-8">
<div className="w-12 h-12 rounded-xl bg-muted/30 flex items-center justify-center mx-auto mb-4"> <div className="w-12 h-12 rounded-xl bg-muted/30 flex items-center justify-center mx-auto mb-4">

View File

@ -145,7 +145,7 @@ export function RagQuery({
</div> </div>
<span className="text-sm font-semibold">Pure RAG</span> <span className="text-sm font-semibold">Pure RAG</span>
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight"> <span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
Pinecone + LangChain Vector DB + LLM
</span> </span>
{queryMode === 'pure-rag' && ( {queryMode === 'pure-rag' && (
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div> <div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
@ -166,7 +166,7 @@ export function RagQuery({
</div> </div>
<span className="text-sm font-semibold">Traditional Graph</span> <span className="text-sm font-semibold">Traditional Graph</span>
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight"> <span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
Graph database only Graph DB + LLM
</span> </span>
{queryMode === 'traditional' && ( {queryMode === 'traditional' && (
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div> <div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
@ -176,28 +176,20 @@ export function RagQuery({
<button <button
type="button" type="button"
onClick={() => handleQueryModeChange('vector-search')} onClick={() => handleQueryModeChange('vector-search')}
disabled={!vectorEnabled} disabled={true}
className={`relative flex flex-col items-center p-3 border rounded-lg transition-all duration-200 hover:shadow-md ${ title="GraphRAG requires a GNN model (not yet available)"
queryMode === 'vector-search' className="relative flex flex-col items-center p-3 border rounded-lg border-border/30 opacity-50 cursor-not-allowed"
? 'border-nvidia-green bg-nvidia-green/10 text-nvidia-green shadow-sm'
: vectorEnabled
? 'border-border/40 hover:border-border/60 hover:bg-muted/20'
: 'border-border/30 opacity-50 cursor-not-allowed'
}`}
> >
<div className="w-5 h-5 rounded-md bg-nvidia-green/15 flex items-center justify-center mb-1.5"> <div className="w-5 h-5 rounded-md bg-muted/15 flex items-center justify-center mb-1.5">
<Cpu className="h-2.5 w-2.5 text-nvidia-green" /> <Cpu className="h-2.5 w-2.5 text-muted-foreground" />
</div> </div>
<span className="text-sm font-semibold">GraphRAG</span> <span className="text-sm font-semibold text-muted-foreground">GraphRAG</span>
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight"> <span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
RAG + GNN RAG + GNN
</span> </span>
<div className="text-[9px] px-1.5 py-0.5 bg-nvidia-green/20 text-nvidia-green rounded mt-1 font-medium"> <div className="text-[9px] px-1.5 py-0.5 bg-amber-500/20 text-amber-700 dark:text-amber-500 rounded mt-1 font-medium">
NEW COMING SOON
</div> </div>
{queryMode === 'vector-search' && (
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
)}
</button> </button>
</div> </div>
</div> </div>

View File

@ -1,7 +1,7 @@
/** /**
* Retrieval Augmented Generation (RAG) implementation using Qdrant and LangChain * Retrieval Augmented Generation (RAG) implementation using Qdrant and LangChain
* This module provides a RetrievalQA chain using Qdrant as the vector store * This module provides a RetrievalQA chain using Qdrant as the vector store
* Note: xAI integration has been removed - needs alternative LLM provider implementation * Uses NVIDIA API for LLM inference
*/ */
import { ChatOpenAI } from "@langchain/openai"; import { ChatOpenAI } from "@langchain/openai";
@ -9,27 +9,42 @@ import { Document } from "@langchain/core/documents";
import { RunnableSequence } from "@langchain/core/runnables"; import { RunnableSequence } from "@langchain/core/runnables";
import { StringOutputParser } from "@langchain/core/output_parsers"; import { StringOutputParser } from "@langchain/core/output_parsers";
import { PromptTemplate } from "@langchain/core/prompts"; import { PromptTemplate } from "@langchain/core/prompts";
import { QdrantService, DocumentSearchResult } from './qdrant'; import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant";
import { Embeddings } from "@langchain/core/embeddings";
import { EmbeddingsService } from './embeddings'; import { EmbeddingsService } from './embeddings';
// Interface for records to store in Qdrant // Custom embeddings adapter to use our EmbeddingsService with LangChain
interface QdrantRecord { class CustomEmbeddings extends Embeddings {
id: string; private embeddingsService: EmbeddingsService;
values: number[];
metadata?: Record<string, any>; constructor() {
super({});
this.embeddingsService = EmbeddingsService.getInstance();
}
async embedDocuments(documents: string[]): Promise<number[][]> {
await this.embeddingsService.initialize();
return await this.embeddingsService.encode(documents);
}
async embedQuery(query: string): Promise<number[]> {
await this.embeddingsService.initialize();
const embeddings = await this.embeddingsService.encode([query]);
return embeddings[0];
}
} }
export class RAGService { export class RAGService {
private static instance: RAGService; private static instance: RAGService;
private pineconeService: QdrantService; private vectorStore: QdrantVectorStore | null = null;
private embeddingsService: EmbeddingsService; private embeddingsService: CustomEmbeddings;
private llm: ChatOpenAI | null = null; private llm: ChatOpenAI | null = null;
private initialized: boolean = false; private initialized: boolean = false;
private isInitializing: boolean = false; private isInitializing: boolean = false;
private collectionName: string = 'entity-embeddings';
private constructor() { private constructor() {
this.pineconeService = QdrantService.getInstance(); this.embeddingsService = new CustomEmbeddings();
this.embeddingsService = EmbeddingsService.getInstance();
} }
/** /**
@ -55,24 +70,52 @@ export class RAGService {
try { try {
console.log('Initializing RAG service...'); console.log('Initializing RAG service...');
// Initialize dependencies // Check for NVIDIA API key
await this.pineconeService.initialize();
await this.embeddingsService.initialize();
// Initialize LLM - Try NVIDIA first, then fall back to error
const nvidiaApiKey = process.env.NVIDIA_API_KEY; const nvidiaApiKey = process.env.NVIDIA_API_KEY;
if (!nvidiaApiKey) { if (!nvidiaApiKey) {
throw new Error('RAG service requires NVIDIA_API_KEY to be set in environment variables. xAI integration has been removed.'); const error = new Error('NVIDIA_API_KEY is required for RAG service. Please set the NVIDIA_API_KEY environment variable.');
console.error('❌ RAG Initialization Error:', error.message);
throw error;
} }
// Note: This is a placeholder - NVIDIA LLM integration would need to be implemented // Initialize NVIDIA LLM using ChatOpenAI with NVIDIA's base URL
// For now, we'll throw an error to indicate RAG service is not available this.llm = new ChatOpenAI({
throw new Error('RAG service is temporarily unavailable after xAI removal. Please implement alternative LLM provider.'); modelName: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
temperature: 0.2,
maxTokens: 1024,
openAIApiKey: nvidiaApiKey,
configuration: {
baseURL: "https://integrate.api.nvidia.com/v1",
timeout: 120000, // 120 second timeout
},
modelKwargs: {
top_p: 0.95,
frequency_penalty: 0,
presence_penalty: 0
}
});
console.log('✅ NVIDIA LLM initialized successfully');
// Initialize Qdrant vector store
const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333';
this.vectorStore = await QdrantVectorStore.fromExistingCollection(
this.embeddingsService,
{
url: qdrantUrl,
collectionName: this.collectionName,
contentPayloadKey: 'text', // Map payload.text to pageContent
}
);
console.log('✅ Qdrant vector store connected successfully');
this.initialized = true; this.initialized = true;
console.log('RAG service initialized successfully'); console.log('RAG service initialized successfully');
} catch (error) { } catch (error) {
console.error('Error initializing RAG service:', error); console.error('❌ Error initializing RAG service:', error);
this.isInitializing = false;
throw error; throw error;
} finally { } finally {
this.isInitializing = false; this.isInitializing = false;
@ -80,7 +123,7 @@ export class RAGService {
} }
/** /**
* Store documents in Pinecone for retrieval * Store documents in Qdrant for retrieval
* @param documents Array of text documents to store * @param documents Array of text documents to store
* @param metadata Optional metadata for the documents * @param metadata Optional metadata for the documents
*/ */
@ -97,29 +140,28 @@ export class RAGService {
return; return;
} }
console.log(`Storing ${documents.length} documents in Pinecone`); if (!this.vectorStore) {
throw new Error('Vector store not initialized');
}
// Generate embeddings for documents console.log(`Storing ${documents.length} documents in Qdrant`);
const embeddings = await this.embeddingsService.encode(documents);
// Prepare records for Pinecone // Create Document objects with metadata
const records: PineconeRecord[] = embeddings.map((embedding, i) => ({ const docs = documents.map((text, i) => new Document({
id: `doc_${Date.now()}_${i}`, pageContent: text,
values: embedding,
metadata: { metadata: {
text: documents[i],
timestamp: new Date().toISOString(), timestamp: new Date().toISOString(),
...(metadata && metadata[i] ? metadata[i] : {}) ...(metadata && metadata[i] ? metadata[i] : {})
} }
})); }));
// Store in Pinecone // Store in Qdrant using LangChain
await this.pineconeService.upsertVectors(records); await this.vectorStore.addDocuments(docs);
console.log(`Successfully stored ${records.length} document embeddings`); console.log(`✅ Successfully stored ${docs.length} document embeddings`);
} }
/** /**
* Perform question answering with document retrieval * Perform question answering with document retrieval using proper RAG implementation
* @param query User query * @param query User query
* @param topK Number of most similar documents to retrieve * @param topK Number of most similar documents to retrieve
* @returns Answer generated from relevant context * @returns Answer generated from relevant context
@ -133,14 +175,17 @@ export class RAGService {
throw new Error('LLM not initialized'); throw new Error('LLM not initialized');
} }
// Generate embedding for query if (!this.vectorStore) {
const queryEmbedding = (await this.embeddingsService.encode([query]))[0]; throw new Error('Vector store not initialized');
}
// Retrieve similar documents from Pinecone console.log(`🔍 Performing RAG query with topK=${topK}`);
const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK);
// Use LangChain's similarity search to retrieve relevant documents
const similarDocs = await this.vectorStore.similaritySearch(query, topK);
if (!similarDocs || similarDocs.length === 0) { if (!similarDocs || similarDocs.length === 0) {
console.log('No relevant documents found, falling back to LLM'); console.log('⚠️ No relevant documents found, falling back to LLM general knowledge');
// Define prompt template for standalone LLM response // Define prompt template for standalone LLM response
const fallbackPromptTemplate = PromptTemplate.fromTemplate(` const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
@ -167,15 +212,64 @@ Answer:
return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`; return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
} }
console.log(`✅ Found ${similarDocs.length} relevant documents`);
// Log first document structure for debugging
if (similarDocs.length > 0) {
console.log('📄 First document structure:', {
hasPageContent: !!similarDocs[0].pageContent,
pageContentLength: similarDocs[0].pageContent?.length || 0,
hasMetadata: !!similarDocs[0].metadata,
metadataKeys: similarDocs[0].metadata ? Object.keys(similarDocs[0].metadata) : []
});
}
// Extract text from retrieved documents // Extract text from retrieved documents
// Support both pageContent (LangChain standard) and metadata.text (legacy format)
const context = similarDocs const context = similarDocs
.map((doc: DocumentSearchResult) => doc.metadata?.text || '') .map((doc) => {
.filter((text: string) => text.length > 0) // Try pageContent first (LangChain standard)
if (doc.pageContent && doc.pageContent.trim().length > 0) {
return doc.pageContent;
}
// Fall back to metadata.text (legacy Qdrant storage format)
if (doc.metadata?.text && doc.metadata.text.trim().length > 0) {
return doc.metadata.text;
}
return '';
})
.filter((text) => text.length > 0)
.join('\n\n'); .join('\n\n');
// Define prompt template for QA console.log(`📝 Extracted context length: ${context.length} characters`);
if (!context || context.trim().length === 0) {
console.log('⚠️ Retrieved documents have no content, falling back to LLM');
const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
You are a helpful assistant answering questions based on your general knowledge.
Question: {query}
Answer:
`);
const fallbackChain = RunnableSequence.from([
{
query: () => query,
},
fallbackPromptTemplate,
this.llm,
new StringOutputParser(),
]);
const answer = await fallbackChain.invoke({});
return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
}
// Define prompt template for RAG
const promptTemplate = PromptTemplate.fromTemplate(` const promptTemplate = PromptTemplate.fromTemplate(`
Answer the question based only on the following context: Answer the question based only on the following context from the knowledge base.
If you cannot find the answer in the context, say "I cannot find this information in the knowledge base."
Context: Context:
{context} {context}
@ -185,7 +279,7 @@ Question: {query}
Answer: Answer:
`); `);
// Create retrieval chain // Create retrieval chain using RunnableSequence
const retrievalChain = RunnableSequence.from([ const retrievalChain = RunnableSequence.from([
{ {
context: () => context, context: () => context,
@ -196,9 +290,18 @@ Answer:
new StringOutputParser(), new StringOutputParser(),
]); ]);
console.log('🤖 Generating answer with NVIDIA LLM...');
// Execute chain // Execute chain
try {
const answer = await retrievalChain.invoke({}); const answer = await retrievalChain.invoke({});
console.log('✅ RAG query completed successfully');
console.log(`📝 Answer length: ${answer.length} characters`);
return answer; return answer;
} catch (error) {
console.error('❌ Error generating answer with NVIDIA LLM:', error);
throw error;
}
} }
/** /**
@ -215,15 +318,16 @@ Answer:
await this.initialize(); await this.initialize();
} }
// Generate embedding for query if (!this.vectorStore) {
const queryEmbedding = (await this.embeddingsService.encode([query]))[0]; throw new Error('Vector store not initialized');
}
// Retrieve similar documents from Pinecone // Use LangChain's similarity search with scores
const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK); const results = await this.vectorStore.similaritySearchWithScore(query, topK);
return similarDocs.map((doc: DocumentSearchResult) => ({ return results.map(([doc, score]) => ({
text: doc.metadata?.text || '', text: doc.pageContent,
score: doc.score, score: score,
metadata: doc.metadata metadata: doc.metadata
})); }));
} }

View File

@ -19,6 +19,7 @@
"@langchain/community": "^0.3.40", "@langchain/community": "^0.3.40",
"@langchain/core": "^0.3.43", "@langchain/core": "^0.3.43",
"@langchain/openai": "^0.5.2", "@langchain/openai": "^0.5.2",
"@qdrant/js-client-rest": "^1.11.0",
"@radix-ui/react-alert-dialog": "^1.1.4", "@radix-ui/react-alert-dialog": "^1.1.4",
"@radix-ui/react-avatar": "^1.1.2", "@radix-ui/react-avatar": "^1.1.2",
"@radix-ui/react-checkbox": "^1.1.3", "@radix-ui/react-checkbox": "^1.1.3",