mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-22 18:13:52 +00:00
Improve Pure RAG UI and add query mode tracking
- Add query mode badge to answer section showing Pure RAG/Traditional Graph/GraphRAG - Add collapsible reasoning section for <think> tags in answers - Add markdown rendering support (bold/italic) in answers - Fix Pure RAG to properly display answers using llmAnswer state - Hide empty results message for Pure RAG mode - Update metrics sidebar to show query times by mode instead of overall average - Add queryTimesByMode field to metrics API and frontend interfaces - Disable GraphRAG button with "COMING SOON" badge (requires GNN model) - Fix Qdrant vector store document mapping with contentPayloadKey - Update console logs to reflect Qdrant instead of Pinecone - Add @qdrant/js-client-rest dependency to package.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
de9c46e97e
commit
8974ee9913
@ -66,11 +66,12 @@ export async function GET(request: NextRequest) {
|
||||
|
||||
// Get real query logs instead of mock data
|
||||
let queryLogs: QueryLogSummary[] = [];
|
||||
let precision = 0;
|
||||
let precision = 0;
|
||||
let recall = 0;
|
||||
let f1Score = 0;
|
||||
let avgQueryTime = vectorStats.avgQueryTime || 0;
|
||||
let avgRelevance = 0;
|
||||
let queryTimesByMode: Record<string, number> = {};
|
||||
|
||||
// Get query logs from file-based logger instead of Neo4j
|
||||
try {
|
||||
@ -87,25 +88,42 @@ export async function GET(request: NextRequest) {
|
||||
// Calculate metrics from the query logs
|
||||
if (queryLogs.length > 0) {
|
||||
// Calculate metrics from logs with actual data
|
||||
const logsWithMetrics = queryLogs.filter(log =>
|
||||
log.metrics.avgPrecision > 0 ||
|
||||
log.metrics.avgRecall > 0 ||
|
||||
const logsWithMetrics = queryLogs.filter(log =>
|
||||
log.metrics.avgPrecision > 0 ||
|
||||
log.metrics.avgRecall > 0 ||
|
||||
log.metrics.avgExecutionTimeMs > 0
|
||||
);
|
||||
|
||||
|
||||
const logsWithRelevance = queryLogs.filter(log => log.metrics.avgRelevanceScore > 0);
|
||||
|
||||
|
||||
if (logsWithMetrics.length > 0) {
|
||||
precision = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgPrecision || 0), 0) / logsWithMetrics.length;
|
||||
recall = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgRecall || 0), 0) / logsWithMetrics.length;
|
||||
avgQueryTime = logsWithMetrics.reduce((sum, log) => sum + (log.metrics.avgExecutionTimeMs || 0), 0) / logsWithMetrics.length;
|
||||
f1Score = precision > 0 && recall > 0 ? 2 * (precision * recall) / (precision + recall) : 0;
|
||||
}
|
||||
|
||||
|
||||
if (logsWithRelevance.length > 0) {
|
||||
avgRelevance = logsWithRelevance.reduce((sum, log) => sum + (log.metrics.avgRelevanceScore || 0), 0) / logsWithRelevance.length;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate per-mode query times
|
||||
const logsByMode = queryLogs.reduce((acc, log) => {
|
||||
const mode = log.queryMode || 'traditional';
|
||||
if (!acc[mode]) acc[mode] = [];
|
||||
acc[mode].push(log);
|
||||
return acc;
|
||||
}, {} as Record<string, typeof queryLogs>);
|
||||
|
||||
Object.entries(logsByMode).forEach(([mode, logs]) => {
|
||||
const logsWithTime = logs.filter(log => log.metrics.avgExecutionTimeMs > 0);
|
||||
if (logsWithTime.length > 0) {
|
||||
queryTimesByMode[mode] = logsWithTime.reduce((sum, log) =>
|
||||
sum + log.metrics.avgExecutionTimeMs, 0
|
||||
) / logsWithTime.length;
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn('Error getting query logs from file:', error);
|
||||
// Keep values at 0 instead of using defaults
|
||||
@ -132,6 +150,7 @@ export async function GET(request: NextRequest) {
|
||||
recall,
|
||||
f1Score,
|
||||
topQueries,
|
||||
queryTimesByMode: queryTimesByMode || {},
|
||||
// Add metadata about query logs
|
||||
queryLogStats: {
|
||||
totalQueryLogs: queryLogs.length,
|
||||
|
||||
@ -17,6 +17,7 @@ interface MetricsData {
|
||||
recall: number;
|
||||
f1Score: number;
|
||||
topQueries: { query: string; count: number }[];
|
||||
queryTimesByMode?: Record<string, number>;
|
||||
queryLogStats?: {
|
||||
totalQueryLogs: number;
|
||||
totalExecutions: number;
|
||||
|
||||
@ -21,6 +21,7 @@ export default function RagPage() {
|
||||
avgRelevance: number;
|
||||
precision: number;
|
||||
recall: number;
|
||||
queryTimesByMode?: Record<string, number>;
|
||||
} | null>(null);
|
||||
const [currentParams, setCurrentParams] = useState<RagParams>({
|
||||
kNeighbors: 4096,
|
||||
@ -65,7 +66,8 @@ export default function RagPage() {
|
||||
avgQueryTime: data.avgQueryTime,
|
||||
avgRelevance: data.avgRelevance,
|
||||
precision: data.precision,
|
||||
recall: data.recall
|
||||
recall: data.recall,
|
||||
queryTimesByMode: data.queryTimesByMode
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
@ -98,7 +100,7 @@ export default function RagPage() {
|
||||
if (params.usePureRag) {
|
||||
queryMode = 'pure-rag';
|
||||
try {
|
||||
console.log('Using pure RAG with just Pinecone and LangChain for query:', query);
|
||||
console.log('Using pure RAG with Qdrant and NVIDIA LLM for query:', query);
|
||||
const ragResponse = await fetch('/api/rag-query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
@ -110,16 +112,17 @@ export default function RagPage() {
|
||||
|
||||
if (ragResponse.ok) {
|
||||
const data = await ragResponse.json();
|
||||
console.log('📥 RAG Response data:', { hasAnswer: !!data.answer, answerLength: data.answer?.length });
|
||||
// Handle the answer - we might need to display differently than triples
|
||||
if (data.answer) {
|
||||
// Special UI handling for text answer rather than triples
|
||||
setResults([{
|
||||
subject: 'Answer',
|
||||
predicate: '',
|
||||
object: data.answer,
|
||||
usedFallback: data.usedFallback
|
||||
}]);
|
||||
|
||||
console.log('✅ Setting answer in results:', data.answer.substring(0, 100) + '...');
|
||||
|
||||
// Set the LLM answer for display (same as traditional mode)
|
||||
setLlmAnswer(data.answer);
|
||||
|
||||
// Set empty results array since Pure RAG doesn't return triples
|
||||
setResults([]);
|
||||
|
||||
resultCount = 1;
|
||||
relevanceScore = data.relevanceScore || 0;
|
||||
|
||||
@ -364,22 +367,34 @@ export default function RagPage() {
|
||||
</div>
|
||||
|
||||
<div className="space-y-3 text-sm">
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">Avg. Query Time:</span>
|
||||
<span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span>
|
||||
</div>
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">Relevance Score:</span>
|
||||
<span className="font-medium">{metrics.avgRelevance > 0 ? `${(metrics.avgRelevance * 100).toFixed(1)}%` : "No data"}</span>
|
||||
</div>
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">Precision:</span>
|
||||
<span className="font-medium">{metrics.precision > 0 ? `${(metrics.precision * 100).toFixed(1)}%` : "No data"}</span>
|
||||
</div>
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">Recall:</span>
|
||||
<span className="font-medium">{metrics.recall > 0 ? `${(metrics.recall * 100).toFixed(1)}%` : "No data"}</span>
|
||||
</div>
|
||||
{/* Query times by mode */}
|
||||
{metrics.queryTimesByMode && Object.keys(metrics.queryTimesByMode).length > 0 ? (
|
||||
<>
|
||||
{metrics.queryTimesByMode['pure-rag'] !== undefined && (
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">Pure RAG:</span>
|
||||
<span className="font-medium">{metrics.queryTimesByMode['pure-rag'].toFixed(2)}ms</span>
|
||||
</div>
|
||||
)}
|
||||
{metrics.queryTimesByMode['traditional'] !== undefined && (
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">Traditional Graph:</span>
|
||||
<span className="font-medium">{metrics.queryTimesByMode['traditional'].toFixed(2)}ms</span>
|
||||
</div>
|
||||
)}
|
||||
{metrics.queryTimesByMode['vector-search'] !== undefined && (
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">GraphRAG:</span>
|
||||
<span className="font-medium">{metrics.queryTimesByMode['vector-search'].toFixed(2)}ms</span>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
) : (
|
||||
<div className="flex justify-between">
|
||||
<span className="text-muted-foreground">Avg. Query Time:</span>
|
||||
<span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
@ -403,24 +418,64 @@ export default function RagPage() {
|
||||
<SearchIcon className="h-3 w-3 text-nvidia-green" />
|
||||
</div>
|
||||
<h3 className="text-lg font-semibold text-foreground">Answer</h3>
|
||||
{currentParams.queryMode && (
|
||||
<span className="text-xs px-2.5 py-1 rounded-full font-medium bg-nvidia-green/10 text-nvidia-green border border-nvidia-green/20">
|
||||
{currentParams.queryMode === 'pure-rag' ? 'Pure RAG' :
|
||||
currentParams.queryMode === 'vector-search' ? 'GraphRAG' :
|
||||
'Traditional Graph'}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
<div className="prose prose-sm dark:prose-invert max-w-none">
|
||||
<div className="bg-muted/20 border border-border/20 p-6 rounded-xl">
|
||||
<p className="text-foreground leading-relaxed whitespace-pre-wrap">{llmAnswer}</p>
|
||||
</div>
|
||||
{(() => {
|
||||
// Parse <think> tags
|
||||
const thinkMatch = llmAnswer.match(/<think>([\s\S]*?)<\/think>/);
|
||||
const thinkContent = thinkMatch ? thinkMatch[1].trim() : null;
|
||||
const mainAnswer = thinkContent
|
||||
? llmAnswer.replace(/<think>[\s\S]*?<\/think>/, '').trim()
|
||||
: llmAnswer;
|
||||
|
||||
return (
|
||||
<>
|
||||
{thinkContent && (
|
||||
<details className="mb-4 bg-muted/10 border border-border/20 rounded-xl overflow-hidden">
|
||||
<summary className="cursor-pointer p-4 hover:bg-muted/20 transition-colors flex items-center gap-2">
|
||||
<svg className="w-4 h-4 transform transition-transform" style={{ transform: 'rotate(0deg)' }} xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||
</svg>
|
||||
<span className="text-sm font-medium text-muted-foreground">Reasoning Process</span>
|
||||
</summary>
|
||||
<div className="p-4 pt-0 text-sm text-muted-foreground leading-relaxed whitespace-pre-wrap border-t border-border/10">
|
||||
{thinkContent}
|
||||
</div>
|
||||
</details>
|
||||
)}
|
||||
<div className="bg-muted/20 border border-border/20 p-6 rounded-xl">
|
||||
<div
|
||||
className="text-foreground leading-relaxed whitespace-pre-wrap"
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: mainAnswer
|
||||
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
||||
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
})()}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Results Section */}
|
||||
{results && results.length > 0 && (
|
||||
{results && results.length > 0 && !currentParams.usePureRag && (
|
||||
<div className="mt-8 nvidia-build-card">
|
||||
<div className="flex items-center gap-3 mb-6">
|
||||
<div className="w-6 h-6 rounded-md bg-nvidia-green/15 flex items-center justify-center">
|
||||
<SearchIcon className="h-3 w-3 text-nvidia-green" />
|
||||
</div>
|
||||
<h3 className="text-lg font-semibold text-foreground">
|
||||
{llmAnswer ? `Supporting Triples (${results.length})` : `Results (${results.length})`}
|
||||
{llmAnswer ? `Retrieved Knowledge (${results.length})` : `Results (${results.length})`}
|
||||
</h3>
|
||||
</div>
|
||||
<div className="space-y-4">
|
||||
@ -464,7 +519,7 @@ export default function RagPage() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{results && results.length === 0 && !isLoading && (
|
||||
{results && results.length === 0 && !isLoading && !currentParams.usePureRag && (
|
||||
<div className="mt-8 nvidia-build-card border-dashed">
|
||||
<div className="text-center py-8">
|
||||
<div className="w-12 h-12 rounded-xl bg-muted/30 flex items-center justify-center mx-auto mb-4">
|
||||
|
||||
@ -145,7 +145,7 @@ export function RagQuery({
|
||||
</div>
|
||||
<span className="text-sm font-semibold">Pure RAG</span>
|
||||
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
||||
Pinecone + LangChain
|
||||
Vector DB + LLM
|
||||
</span>
|
||||
{queryMode === 'pure-rag' && (
|
||||
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
||||
@ -166,7 +166,7 @@ export function RagQuery({
|
||||
</div>
|
||||
<span className="text-sm font-semibold">Traditional Graph</span>
|
||||
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
||||
Graph database only
|
||||
Graph DB + LLM
|
||||
</span>
|
||||
{queryMode === 'traditional' && (
|
||||
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
||||
@ -176,28 +176,20 @@ export function RagQuery({
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => handleQueryModeChange('vector-search')}
|
||||
disabled={!vectorEnabled}
|
||||
className={`relative flex flex-col items-center p-3 border rounded-lg transition-all duration-200 hover:shadow-md ${
|
||||
queryMode === 'vector-search'
|
||||
? 'border-nvidia-green bg-nvidia-green/10 text-nvidia-green shadow-sm'
|
||||
: vectorEnabled
|
||||
? 'border-border/40 hover:border-border/60 hover:bg-muted/20'
|
||||
: 'border-border/30 opacity-50 cursor-not-allowed'
|
||||
}`}
|
||||
disabled={true}
|
||||
title="GraphRAG requires a GNN model (not yet available)"
|
||||
className="relative flex flex-col items-center p-3 border rounded-lg border-border/30 opacity-50 cursor-not-allowed"
|
||||
>
|
||||
<div className="w-5 h-5 rounded-md bg-nvidia-green/15 flex items-center justify-center mb-1.5">
|
||||
<Cpu className="h-2.5 w-2.5 text-nvidia-green" />
|
||||
<div className="w-5 h-5 rounded-md bg-muted/15 flex items-center justify-center mb-1.5">
|
||||
<Cpu className="h-2.5 w-2.5 text-muted-foreground" />
|
||||
</div>
|
||||
<span className="text-sm font-semibold">GraphRAG</span>
|
||||
<span className="text-sm font-semibold text-muted-foreground">GraphRAG</span>
|
||||
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
||||
RAG + GNN
|
||||
</span>
|
||||
<div className="text-[9px] px-1.5 py-0.5 bg-nvidia-green/20 text-nvidia-green rounded mt-1 font-medium">
|
||||
NEW
|
||||
<div className="text-[9px] px-1.5 py-0.5 bg-amber-500/20 text-amber-700 dark:text-amber-500 rounded mt-1 font-medium">
|
||||
COMING SOON
|
||||
</div>
|
||||
{queryMode === 'vector-search' && (
|
||||
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Retrieval Augmented Generation (RAG) implementation using Qdrant and LangChain
|
||||
* This module provides a RetrievalQA chain using Qdrant as the vector store
|
||||
* Note: xAI integration has been removed - needs alternative LLM provider implementation
|
||||
* Uses NVIDIA API for LLM inference
|
||||
*/
|
||||
|
||||
import { ChatOpenAI } from "@langchain/openai";
|
||||
@ -9,27 +9,42 @@ import { Document } from "@langchain/core/documents";
|
||||
import { RunnableSequence } from "@langchain/core/runnables";
|
||||
import { StringOutputParser } from "@langchain/core/output_parsers";
|
||||
import { PromptTemplate } from "@langchain/core/prompts";
|
||||
import { QdrantService, DocumentSearchResult } from './qdrant';
|
||||
import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant";
|
||||
import { Embeddings } from "@langchain/core/embeddings";
|
||||
import { EmbeddingsService } from './embeddings';
|
||||
|
||||
// Interface for records to store in Qdrant
|
||||
interface QdrantRecord {
|
||||
id: string;
|
||||
values: number[];
|
||||
metadata?: Record<string, any>;
|
||||
// Custom embeddings adapter to use our EmbeddingsService with LangChain
|
||||
class CustomEmbeddings extends Embeddings {
|
||||
private embeddingsService: EmbeddingsService;
|
||||
|
||||
constructor() {
|
||||
super({});
|
||||
this.embeddingsService = EmbeddingsService.getInstance();
|
||||
}
|
||||
|
||||
async embedDocuments(documents: string[]): Promise<number[][]> {
|
||||
await this.embeddingsService.initialize();
|
||||
return await this.embeddingsService.encode(documents);
|
||||
}
|
||||
|
||||
async embedQuery(query: string): Promise<number[]> {
|
||||
await this.embeddingsService.initialize();
|
||||
const embeddings = await this.embeddingsService.encode([query]);
|
||||
return embeddings[0];
|
||||
}
|
||||
}
|
||||
|
||||
export class RAGService {
|
||||
private static instance: RAGService;
|
||||
private pineconeService: QdrantService;
|
||||
private embeddingsService: EmbeddingsService;
|
||||
private vectorStore: QdrantVectorStore | null = null;
|
||||
private embeddingsService: CustomEmbeddings;
|
||||
private llm: ChatOpenAI | null = null;
|
||||
private initialized: boolean = false;
|
||||
private isInitializing: boolean = false;
|
||||
private collectionName: string = 'entity-embeddings';
|
||||
|
||||
private constructor() {
|
||||
this.pineconeService = QdrantService.getInstance();
|
||||
this.embeddingsService = EmbeddingsService.getInstance();
|
||||
this.embeddingsService = new CustomEmbeddings();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -51,28 +66,56 @@ export class RAGService {
|
||||
}
|
||||
|
||||
this.isInitializing = true;
|
||||
|
||||
|
||||
try {
|
||||
console.log('Initializing RAG service...');
|
||||
|
||||
// Initialize dependencies
|
||||
await this.pineconeService.initialize();
|
||||
await this.embeddingsService.initialize();
|
||||
|
||||
// Initialize LLM - Try NVIDIA first, then fall back to error
|
||||
// Check for NVIDIA API key
|
||||
const nvidiaApiKey = process.env.NVIDIA_API_KEY;
|
||||
if (!nvidiaApiKey) {
|
||||
throw new Error('RAG service requires NVIDIA_API_KEY to be set in environment variables. xAI integration has been removed.');
|
||||
const error = new Error('NVIDIA_API_KEY is required for RAG service. Please set the NVIDIA_API_KEY environment variable.');
|
||||
console.error('❌ RAG Initialization Error:', error.message);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Note: This is a placeholder - NVIDIA LLM integration would need to be implemented
|
||||
// For now, we'll throw an error to indicate RAG service is not available
|
||||
throw new Error('RAG service is temporarily unavailable after xAI removal. Please implement alternative LLM provider.');
|
||||
|
||||
// Initialize NVIDIA LLM using ChatOpenAI with NVIDIA's base URL
|
||||
this.llm = new ChatOpenAI({
|
||||
modelName: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
|
||||
temperature: 0.2,
|
||||
maxTokens: 1024,
|
||||
openAIApiKey: nvidiaApiKey,
|
||||
configuration: {
|
||||
baseURL: "https://integrate.api.nvidia.com/v1",
|
||||
timeout: 120000, // 120 second timeout
|
||||
},
|
||||
modelKwargs: {
|
||||
top_p: 0.95,
|
||||
frequency_penalty: 0,
|
||||
presence_penalty: 0
|
||||
}
|
||||
});
|
||||
|
||||
console.log('✅ NVIDIA LLM initialized successfully');
|
||||
|
||||
// Initialize Qdrant vector store
|
||||
const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333';
|
||||
|
||||
this.vectorStore = await QdrantVectorStore.fromExistingCollection(
|
||||
this.embeddingsService,
|
||||
{
|
||||
url: qdrantUrl,
|
||||
collectionName: this.collectionName,
|
||||
contentPayloadKey: 'text', // Map payload.text to pageContent
|
||||
}
|
||||
);
|
||||
|
||||
console.log('✅ Qdrant vector store connected successfully');
|
||||
|
||||
this.initialized = true;
|
||||
console.log('RAG service initialized successfully');
|
||||
console.log('✅ RAG service initialized successfully');
|
||||
} catch (error) {
|
||||
console.error('Error initializing RAG service:', error);
|
||||
console.error('❌ Error initializing RAG service:', error);
|
||||
this.isInitializing = false;
|
||||
throw error;
|
||||
} finally {
|
||||
this.isInitializing = false;
|
||||
@ -80,7 +123,7 @@ export class RAGService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Store documents in Pinecone for retrieval
|
||||
* Store documents in Qdrant for retrieval
|
||||
* @param documents Array of text documents to store
|
||||
* @param metadata Optional metadata for the documents
|
||||
*/
|
||||
@ -97,29 +140,28 @@ export class RAGService {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Storing ${documents.length} documents in Pinecone`);
|
||||
if (!this.vectorStore) {
|
||||
throw new Error('Vector store not initialized');
|
||||
}
|
||||
|
||||
// Generate embeddings for documents
|
||||
const embeddings = await this.embeddingsService.encode(documents);
|
||||
console.log(`Storing ${documents.length} documents in Qdrant`);
|
||||
|
||||
// Prepare records for Pinecone
|
||||
const records: PineconeRecord[] = embeddings.map((embedding, i) => ({
|
||||
id: `doc_${Date.now()}_${i}`,
|
||||
values: embedding,
|
||||
// Create Document objects with metadata
|
||||
const docs = documents.map((text, i) => new Document({
|
||||
pageContent: text,
|
||||
metadata: {
|
||||
text: documents[i],
|
||||
timestamp: new Date().toISOString(),
|
||||
...(metadata && metadata[i] ? metadata[i] : {})
|
||||
}
|
||||
}));
|
||||
|
||||
// Store in Pinecone
|
||||
await this.pineconeService.upsertVectors(records);
|
||||
console.log(`Successfully stored ${records.length} document embeddings`);
|
||||
// Store in Qdrant using LangChain
|
||||
await this.vectorStore.addDocuments(docs);
|
||||
console.log(`✅ Successfully stored ${docs.length} document embeddings`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform question answering with document retrieval
|
||||
* Perform question answering with document retrieval using proper RAG implementation
|
||||
* @param query User query
|
||||
* @param topK Number of most similar documents to retrieve
|
||||
* @returns Answer generated from relevant context
|
||||
@ -133,15 +175,18 @@ export class RAGService {
|
||||
throw new Error('LLM not initialized');
|
||||
}
|
||||
|
||||
// Generate embedding for query
|
||||
const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
|
||||
if (!this.vectorStore) {
|
||||
throw new Error('Vector store not initialized');
|
||||
}
|
||||
|
||||
console.log(`🔍 Performing RAG query with topK=${topK}`);
|
||||
|
||||
// Use LangChain's similarity search to retrieve relevant documents
|
||||
const similarDocs = await this.vectorStore.similaritySearch(query, topK);
|
||||
|
||||
// Retrieve similar documents from Pinecone
|
||||
const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK);
|
||||
|
||||
if (!similarDocs || similarDocs.length === 0) {
|
||||
console.log('No relevant documents found, falling back to LLM');
|
||||
|
||||
console.log('⚠️ No relevant documents found, falling back to LLM general knowledge');
|
||||
|
||||
// Define prompt template for standalone LLM response
|
||||
const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
|
||||
You are a helpful assistant answering questions based on your general knowledge.
|
||||
@ -167,15 +212,64 @@ Answer:
|
||||
return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
|
||||
}
|
||||
|
||||
console.log(`✅ Found ${similarDocs.length} relevant documents`);
|
||||
|
||||
// Log first document structure for debugging
|
||||
if (similarDocs.length > 0) {
|
||||
console.log('📄 First document structure:', {
|
||||
hasPageContent: !!similarDocs[0].pageContent,
|
||||
pageContentLength: similarDocs[0].pageContent?.length || 0,
|
||||
hasMetadata: !!similarDocs[0].metadata,
|
||||
metadataKeys: similarDocs[0].metadata ? Object.keys(similarDocs[0].metadata) : []
|
||||
});
|
||||
}
|
||||
|
||||
// Extract text from retrieved documents
|
||||
// Support both pageContent (LangChain standard) and metadata.text (legacy format)
|
||||
const context = similarDocs
|
||||
.map((doc: DocumentSearchResult) => doc.metadata?.text || '')
|
||||
.filter((text: string) => text.length > 0)
|
||||
.map((doc) => {
|
||||
// Try pageContent first (LangChain standard)
|
||||
if (doc.pageContent && doc.pageContent.trim().length > 0) {
|
||||
return doc.pageContent;
|
||||
}
|
||||
// Fall back to metadata.text (legacy Qdrant storage format)
|
||||
if (doc.metadata?.text && doc.metadata.text.trim().length > 0) {
|
||||
return doc.metadata.text;
|
||||
}
|
||||
return '';
|
||||
})
|
||||
.filter((text) => text.length > 0)
|
||||
.join('\n\n');
|
||||
|
||||
// Define prompt template for QA
|
||||
console.log(`📝 Extracted context length: ${context.length} characters`);
|
||||
|
||||
if (!context || context.trim().length === 0) {
|
||||
console.log('⚠️ Retrieved documents have no content, falling back to LLM');
|
||||
const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
|
||||
You are a helpful assistant answering questions based on your general knowledge.
|
||||
|
||||
Question: {query}
|
||||
|
||||
Answer:
|
||||
`);
|
||||
|
||||
const fallbackChain = RunnableSequence.from([
|
||||
{
|
||||
query: () => query,
|
||||
},
|
||||
fallbackPromptTemplate,
|
||||
this.llm,
|
||||
new StringOutputParser(),
|
||||
]);
|
||||
|
||||
const answer = await fallbackChain.invoke({});
|
||||
return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
|
||||
}
|
||||
|
||||
// Define prompt template for RAG
|
||||
const promptTemplate = PromptTemplate.fromTemplate(`
|
||||
Answer the question based only on the following context:
|
||||
Answer the question based only on the following context from the knowledge base.
|
||||
If you cannot find the answer in the context, say "I cannot find this information in the knowledge base."
|
||||
|
||||
Context:
|
||||
{context}
|
||||
@ -185,7 +279,7 @@ Question: {query}
|
||||
Answer:
|
||||
`);
|
||||
|
||||
// Create retrieval chain
|
||||
// Create retrieval chain using RunnableSequence
|
||||
const retrievalChain = RunnableSequence.from([
|
||||
{
|
||||
context: () => context,
|
||||
@ -196,9 +290,18 @@ Answer:
|
||||
new StringOutputParser(),
|
||||
]);
|
||||
|
||||
console.log('🤖 Generating answer with NVIDIA LLM...');
|
||||
|
||||
// Execute chain
|
||||
const answer = await retrievalChain.invoke({});
|
||||
return answer;
|
||||
try {
|
||||
const answer = await retrievalChain.invoke({});
|
||||
console.log('✅ RAG query completed successfully');
|
||||
console.log(`📝 Answer length: ${answer.length} characters`);
|
||||
return answer;
|
||||
} catch (error) {
|
||||
console.error('❌ Error generating answer with NVIDIA LLM:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -215,15 +318,16 @@ Answer:
|
||||
await this.initialize();
|
||||
}
|
||||
|
||||
// Generate embedding for query
|
||||
const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
|
||||
if (!this.vectorStore) {
|
||||
throw new Error('Vector store not initialized');
|
||||
}
|
||||
|
||||
// Retrieve similar documents from Pinecone
|
||||
const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK);
|
||||
|
||||
return similarDocs.map((doc: DocumentSearchResult) => ({
|
||||
text: doc.metadata?.text || '',
|
||||
score: doc.score,
|
||||
// Use LangChain's similarity search with scores
|
||||
const results = await this.vectorStore.similaritySearchWithScore(query, topK);
|
||||
|
||||
return results.map(([doc, score]) => ({
|
||||
text: doc.pageContent,
|
||||
score: score,
|
||||
metadata: doc.metadata
|
||||
}));
|
||||
}
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
"@langchain/community": "^0.3.40",
|
||||
"@langchain/core": "^0.3.43",
|
||||
"@langchain/openai": "^0.5.2",
|
||||
"@qdrant/js-client-rest": "^1.11.0",
|
||||
"@radix-ui/react-alert-dialog": "^1.1.4",
|
||||
"@radix-ui/react-avatar": "^1.1.2",
|
||||
"@radix-ui/react-checkbox": "^1.1.3",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user