mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 18:33:54 +00:00
Improve Pure RAG UI and add query mode tracking
- Add query mode badge to answer section showing Pure RAG/Traditional Graph/GraphRAG - Add collapsible reasoning section for <think> tags in answers - Add markdown rendering support (bold/italic) in answers - Fix Pure RAG to properly display answers using llmAnswer state - Hide empty results message for Pure RAG mode - Update metrics sidebar to show query times by mode instead of overall average - Add queryTimesByMode field to metrics API and frontend interfaces - Disable GraphRAG button with "COMING SOON" badge (requires GNN model) - Fix Qdrant vector store document mapping with contentPayloadKey - Update console logs to reflect Qdrant instead of Pinecone - Add @qdrant/js-client-rest dependency to package.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
de9c46e97e
commit
8974ee9913
@ -71,6 +71,7 @@ export async function GET(request: NextRequest) {
|
|||||||
let f1Score = 0;
|
let f1Score = 0;
|
||||||
let avgQueryTime = vectorStats.avgQueryTime || 0;
|
let avgQueryTime = vectorStats.avgQueryTime || 0;
|
||||||
let avgRelevance = 0;
|
let avgRelevance = 0;
|
||||||
|
let queryTimesByMode: Record<string, number> = {};
|
||||||
|
|
||||||
// Get query logs from file-based logger instead of Neo4j
|
// Get query logs from file-based logger instead of Neo4j
|
||||||
try {
|
try {
|
||||||
@ -106,6 +107,23 @@ export async function GET(request: NextRequest) {
|
|||||||
avgRelevance = logsWithRelevance.reduce((sum, log) => sum + (log.metrics.avgRelevanceScore || 0), 0) / logsWithRelevance.length;
|
avgRelevance = logsWithRelevance.reduce((sum, log) => sum + (log.metrics.avgRelevanceScore || 0), 0) / logsWithRelevance.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate per-mode query times
|
||||||
|
const logsByMode = queryLogs.reduce((acc, log) => {
|
||||||
|
const mode = log.queryMode || 'traditional';
|
||||||
|
if (!acc[mode]) acc[mode] = [];
|
||||||
|
acc[mode].push(log);
|
||||||
|
return acc;
|
||||||
|
}, {} as Record<string, typeof queryLogs>);
|
||||||
|
|
||||||
|
Object.entries(logsByMode).forEach(([mode, logs]) => {
|
||||||
|
const logsWithTime = logs.filter(log => log.metrics.avgExecutionTimeMs > 0);
|
||||||
|
if (logsWithTime.length > 0) {
|
||||||
|
queryTimesByMode[mode] = logsWithTime.reduce((sum, log) =>
|
||||||
|
sum + log.metrics.avgExecutionTimeMs, 0
|
||||||
|
) / logsWithTime.length;
|
||||||
|
}
|
||||||
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn('Error getting query logs from file:', error);
|
console.warn('Error getting query logs from file:', error);
|
||||||
// Keep values at 0 instead of using defaults
|
// Keep values at 0 instead of using defaults
|
||||||
@ -132,6 +150,7 @@ export async function GET(request: NextRequest) {
|
|||||||
recall,
|
recall,
|
||||||
f1Score,
|
f1Score,
|
||||||
topQueries,
|
topQueries,
|
||||||
|
queryTimesByMode: queryTimesByMode || {},
|
||||||
// Add metadata about query logs
|
// Add metadata about query logs
|
||||||
queryLogStats: {
|
queryLogStats: {
|
||||||
totalQueryLogs: queryLogs.length,
|
totalQueryLogs: queryLogs.length,
|
||||||
|
|||||||
@ -17,6 +17,7 @@ interface MetricsData {
|
|||||||
recall: number;
|
recall: number;
|
||||||
f1Score: number;
|
f1Score: number;
|
||||||
topQueries: { query: string; count: number }[];
|
topQueries: { query: string; count: number }[];
|
||||||
|
queryTimesByMode?: Record<string, number>;
|
||||||
queryLogStats?: {
|
queryLogStats?: {
|
||||||
totalQueryLogs: number;
|
totalQueryLogs: number;
|
||||||
totalExecutions: number;
|
totalExecutions: number;
|
||||||
|
|||||||
@ -21,6 +21,7 @@ export default function RagPage() {
|
|||||||
avgRelevance: number;
|
avgRelevance: number;
|
||||||
precision: number;
|
precision: number;
|
||||||
recall: number;
|
recall: number;
|
||||||
|
queryTimesByMode?: Record<string, number>;
|
||||||
} | null>(null);
|
} | null>(null);
|
||||||
const [currentParams, setCurrentParams] = useState<RagParams>({
|
const [currentParams, setCurrentParams] = useState<RagParams>({
|
||||||
kNeighbors: 4096,
|
kNeighbors: 4096,
|
||||||
@ -65,7 +66,8 @@ export default function RagPage() {
|
|||||||
avgQueryTime: data.avgQueryTime,
|
avgQueryTime: data.avgQueryTime,
|
||||||
avgRelevance: data.avgRelevance,
|
avgRelevance: data.avgRelevance,
|
||||||
precision: data.precision,
|
precision: data.precision,
|
||||||
recall: data.recall
|
recall: data.recall,
|
||||||
|
queryTimesByMode: data.queryTimesByMode
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@ -98,7 +100,7 @@ export default function RagPage() {
|
|||||||
if (params.usePureRag) {
|
if (params.usePureRag) {
|
||||||
queryMode = 'pure-rag';
|
queryMode = 'pure-rag';
|
||||||
try {
|
try {
|
||||||
console.log('Using pure RAG with just Pinecone and LangChain for query:', query);
|
console.log('Using pure RAG with Qdrant and NVIDIA LLM for query:', query);
|
||||||
const ragResponse = await fetch('/api/rag-query', {
|
const ragResponse = await fetch('/api/rag-query', {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
@ -110,15 +112,16 @@ export default function RagPage() {
|
|||||||
|
|
||||||
if (ragResponse.ok) {
|
if (ragResponse.ok) {
|
||||||
const data = await ragResponse.json();
|
const data = await ragResponse.json();
|
||||||
|
console.log('📥 RAG Response data:', { hasAnswer: !!data.answer, answerLength: data.answer?.length });
|
||||||
// Handle the answer - we might need to display differently than triples
|
// Handle the answer - we might need to display differently than triples
|
||||||
if (data.answer) {
|
if (data.answer) {
|
||||||
// Special UI handling for text answer rather than triples
|
console.log('✅ Setting answer in results:', data.answer.substring(0, 100) + '...');
|
||||||
setResults([{
|
|
||||||
subject: 'Answer',
|
// Set the LLM answer for display (same as traditional mode)
|
||||||
predicate: '',
|
setLlmAnswer(data.answer);
|
||||||
object: data.answer,
|
|
||||||
usedFallback: data.usedFallback
|
// Set empty results array since Pure RAG doesn't return triples
|
||||||
}]);
|
setResults([]);
|
||||||
|
|
||||||
resultCount = 1;
|
resultCount = 1;
|
||||||
relevanceScore = data.relevanceScore || 0;
|
relevanceScore = data.relevanceScore || 0;
|
||||||
@ -364,22 +367,34 @@ export default function RagPage() {
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="space-y-3 text-sm">
|
<div className="space-y-3 text-sm">
|
||||||
|
{/* Query times by mode */}
|
||||||
|
{metrics.queryTimesByMode && Object.keys(metrics.queryTimesByMode).length > 0 ? (
|
||||||
|
<>
|
||||||
|
{metrics.queryTimesByMode['pure-rag'] !== undefined && (
|
||||||
|
<div className="flex justify-between">
|
||||||
|
<span className="text-muted-foreground">Pure RAG:</span>
|
||||||
|
<span className="font-medium">{metrics.queryTimesByMode['pure-rag'].toFixed(2)}ms</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{metrics.queryTimesByMode['traditional'] !== undefined && (
|
||||||
|
<div className="flex justify-between">
|
||||||
|
<span className="text-muted-foreground">Traditional Graph:</span>
|
||||||
|
<span className="font-medium">{metrics.queryTimesByMode['traditional'].toFixed(2)}ms</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
{metrics.queryTimesByMode['vector-search'] !== undefined && (
|
||||||
|
<div className="flex justify-between">
|
||||||
|
<span className="text-muted-foreground">GraphRAG:</span>
|
||||||
|
<span className="font-medium">{metrics.queryTimesByMode['vector-search'].toFixed(2)}ms</span>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
<div className="flex justify-between">
|
<div className="flex justify-between">
|
||||||
<span className="text-muted-foreground">Avg. Query Time:</span>
|
<span className="text-muted-foreground">Avg. Query Time:</span>
|
||||||
<span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span>
|
<span className="font-medium">{metrics.avgQueryTime > 0 ? `${metrics.avgQueryTime.toFixed(2)}ms` : "No data"}</span>
|
||||||
</div>
|
</div>
|
||||||
<div className="flex justify-between">
|
)}
|
||||||
<span className="text-muted-foreground">Relevance Score:</span>
|
|
||||||
<span className="font-medium">{metrics.avgRelevance > 0 ? `${(metrics.avgRelevance * 100).toFixed(1)}%` : "No data"}</span>
|
|
||||||
</div>
|
|
||||||
<div className="flex justify-between">
|
|
||||||
<span className="text-muted-foreground">Precision:</span>
|
|
||||||
<span className="font-medium">{metrics.precision > 0 ? `${(metrics.precision * 100).toFixed(1)}%` : "No data"}</span>
|
|
||||||
</div>
|
|
||||||
<div className="flex justify-between">
|
|
||||||
<span className="text-muted-foreground">Recall:</span>
|
|
||||||
<span className="font-medium">{metrics.recall > 0 ? `${(metrics.recall * 100).toFixed(1)}%` : "No data"}</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
@ -403,24 +418,64 @@ export default function RagPage() {
|
|||||||
<SearchIcon className="h-3 w-3 text-nvidia-green" />
|
<SearchIcon className="h-3 w-3 text-nvidia-green" />
|
||||||
</div>
|
</div>
|
||||||
<h3 className="text-lg font-semibold text-foreground">Answer</h3>
|
<h3 className="text-lg font-semibold text-foreground">Answer</h3>
|
||||||
|
{currentParams.queryMode && (
|
||||||
|
<span className="text-xs px-2.5 py-1 rounded-full font-medium bg-nvidia-green/10 text-nvidia-green border border-nvidia-green/20">
|
||||||
|
{currentParams.queryMode === 'pure-rag' ? 'Pure RAG' :
|
||||||
|
currentParams.queryMode === 'vector-search' ? 'GraphRAG' :
|
||||||
|
'Traditional Graph'}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<div className="prose prose-sm dark:prose-invert max-w-none">
|
<div className="prose prose-sm dark:prose-invert max-w-none">
|
||||||
<div className="bg-muted/20 border border-border/20 p-6 rounded-xl">
|
{(() => {
|
||||||
<p className="text-foreground leading-relaxed whitespace-pre-wrap">{llmAnswer}</p>
|
// Parse <think> tags
|
||||||
|
const thinkMatch = llmAnswer.match(/<think>([\s\S]*?)<\/think>/);
|
||||||
|
const thinkContent = thinkMatch ? thinkMatch[1].trim() : null;
|
||||||
|
const mainAnswer = thinkContent
|
||||||
|
? llmAnswer.replace(/<think>[\s\S]*?<\/think>/, '').trim()
|
||||||
|
: llmAnswer;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
{thinkContent && (
|
||||||
|
<details className="mb-4 bg-muted/10 border border-border/20 rounded-xl overflow-hidden">
|
||||||
|
<summary className="cursor-pointer p-4 hover:bg-muted/20 transition-colors flex items-center gap-2">
|
||||||
|
<svg className="w-4 h-4 transform transition-transform" style={{ transform: 'rotate(0deg)' }} xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" />
|
||||||
|
</svg>
|
||||||
|
<span className="text-sm font-medium text-muted-foreground">Reasoning Process</span>
|
||||||
|
</summary>
|
||||||
|
<div className="p-4 pt-0 text-sm text-muted-foreground leading-relaxed whitespace-pre-wrap border-t border-border/10">
|
||||||
|
{thinkContent}
|
||||||
</div>
|
</div>
|
||||||
|
</details>
|
||||||
|
)}
|
||||||
|
<div className="bg-muted/20 border border-border/20 p-6 rounded-xl">
|
||||||
|
<div
|
||||||
|
className="text-foreground leading-relaxed whitespace-pre-wrap"
|
||||||
|
dangerouslySetInnerHTML={{
|
||||||
|
__html: mainAnswer
|
||||||
|
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
||||||
|
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
})()}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Results Section */}
|
{/* Results Section */}
|
||||||
{results && results.length > 0 && (
|
{results && results.length > 0 && !currentParams.usePureRag && (
|
||||||
<div className="mt-8 nvidia-build-card">
|
<div className="mt-8 nvidia-build-card">
|
||||||
<div className="flex items-center gap-3 mb-6">
|
<div className="flex items-center gap-3 mb-6">
|
||||||
<div className="w-6 h-6 rounded-md bg-nvidia-green/15 flex items-center justify-center">
|
<div className="w-6 h-6 rounded-md bg-nvidia-green/15 flex items-center justify-center">
|
||||||
<SearchIcon className="h-3 w-3 text-nvidia-green" />
|
<SearchIcon className="h-3 w-3 text-nvidia-green" />
|
||||||
</div>
|
</div>
|
||||||
<h3 className="text-lg font-semibold text-foreground">
|
<h3 className="text-lg font-semibold text-foreground">
|
||||||
{llmAnswer ? `Supporting Triples (${results.length})` : `Results (${results.length})`}
|
{llmAnswer ? `Retrieved Knowledge (${results.length})` : `Results (${results.length})`}
|
||||||
</h3>
|
</h3>
|
||||||
</div>
|
</div>
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
@ -464,7 +519,7 @@ export default function RagPage() {
|
|||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{results && results.length === 0 && !isLoading && (
|
{results && results.length === 0 && !isLoading && !currentParams.usePureRag && (
|
||||||
<div className="mt-8 nvidia-build-card border-dashed">
|
<div className="mt-8 nvidia-build-card border-dashed">
|
||||||
<div className="text-center py-8">
|
<div className="text-center py-8">
|
||||||
<div className="w-12 h-12 rounded-xl bg-muted/30 flex items-center justify-center mx-auto mb-4">
|
<div className="w-12 h-12 rounded-xl bg-muted/30 flex items-center justify-center mx-auto mb-4">
|
||||||
|
|||||||
@ -145,7 +145,7 @@ export function RagQuery({
|
|||||||
</div>
|
</div>
|
||||||
<span className="text-sm font-semibold">Pure RAG</span>
|
<span className="text-sm font-semibold">Pure RAG</span>
|
||||||
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
||||||
Pinecone + LangChain
|
Vector DB + LLM
|
||||||
</span>
|
</span>
|
||||||
{queryMode === 'pure-rag' && (
|
{queryMode === 'pure-rag' && (
|
||||||
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
||||||
@ -166,7 +166,7 @@ export function RagQuery({
|
|||||||
</div>
|
</div>
|
||||||
<span className="text-sm font-semibold">Traditional Graph</span>
|
<span className="text-sm font-semibold">Traditional Graph</span>
|
||||||
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
||||||
Graph database only
|
Graph DB + LLM
|
||||||
</span>
|
</span>
|
||||||
{queryMode === 'traditional' && (
|
{queryMode === 'traditional' && (
|
||||||
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
||||||
@ -176,28 +176,20 @@ export function RagQuery({
|
|||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
onClick={() => handleQueryModeChange('vector-search')}
|
onClick={() => handleQueryModeChange('vector-search')}
|
||||||
disabled={!vectorEnabled}
|
disabled={true}
|
||||||
className={`relative flex flex-col items-center p-3 border rounded-lg transition-all duration-200 hover:shadow-md ${
|
title="GraphRAG requires a GNN model (not yet available)"
|
||||||
queryMode === 'vector-search'
|
className="relative flex flex-col items-center p-3 border rounded-lg border-border/30 opacity-50 cursor-not-allowed"
|
||||||
? 'border-nvidia-green bg-nvidia-green/10 text-nvidia-green shadow-sm'
|
|
||||||
: vectorEnabled
|
|
||||||
? 'border-border/40 hover:border-border/60 hover:bg-muted/20'
|
|
||||||
: 'border-border/30 opacity-50 cursor-not-allowed'
|
|
||||||
}`}
|
|
||||||
>
|
>
|
||||||
<div className="w-5 h-5 rounded-md bg-nvidia-green/15 flex items-center justify-center mb-1.5">
|
<div className="w-5 h-5 rounded-md bg-muted/15 flex items-center justify-center mb-1.5">
|
||||||
<Cpu className="h-2.5 w-2.5 text-nvidia-green" />
|
<Cpu className="h-2.5 w-2.5 text-muted-foreground" />
|
||||||
</div>
|
</div>
|
||||||
<span className="text-sm font-semibold">GraphRAG</span>
|
<span className="text-sm font-semibold text-muted-foreground">GraphRAG</span>
|
||||||
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
<span className="text-[10px] mt-0.5 text-center text-muted-foreground leading-tight">
|
||||||
RAG + GNN
|
RAG + GNN
|
||||||
</span>
|
</span>
|
||||||
<div className="text-[9px] px-1.5 py-0.5 bg-nvidia-green/20 text-nvidia-green rounded mt-1 font-medium">
|
<div className="text-[9px] px-1.5 py-0.5 bg-amber-500/20 text-amber-700 dark:text-amber-500 rounded mt-1 font-medium">
|
||||||
NEW
|
COMING SOON
|
||||||
</div>
|
</div>
|
||||||
{queryMode === 'vector-search' && (
|
|
||||||
<div className="absolute top-2 right-2 w-1.5 h-1.5 bg-nvidia-green rounded-full"></div>
|
|
||||||
)}
|
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* Retrieval Augmented Generation (RAG) implementation using Qdrant and LangChain
|
* Retrieval Augmented Generation (RAG) implementation using Qdrant and LangChain
|
||||||
* This module provides a RetrievalQA chain using Qdrant as the vector store
|
* This module provides a RetrievalQA chain using Qdrant as the vector store
|
||||||
* Note: xAI integration has been removed - needs alternative LLM provider implementation
|
* Uses NVIDIA API for LLM inference
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { ChatOpenAI } from "@langchain/openai";
|
import { ChatOpenAI } from "@langchain/openai";
|
||||||
@ -9,27 +9,42 @@ import { Document } from "@langchain/core/documents";
|
|||||||
import { RunnableSequence } from "@langchain/core/runnables";
|
import { RunnableSequence } from "@langchain/core/runnables";
|
||||||
import { StringOutputParser } from "@langchain/core/output_parsers";
|
import { StringOutputParser } from "@langchain/core/output_parsers";
|
||||||
import { PromptTemplate } from "@langchain/core/prompts";
|
import { PromptTemplate } from "@langchain/core/prompts";
|
||||||
import { QdrantService, DocumentSearchResult } from './qdrant';
|
import { QdrantVectorStore } from "@langchain/community/vectorstores/qdrant";
|
||||||
|
import { Embeddings } from "@langchain/core/embeddings";
|
||||||
import { EmbeddingsService } from './embeddings';
|
import { EmbeddingsService } from './embeddings';
|
||||||
|
|
||||||
// Interface for records to store in Qdrant
|
// Custom embeddings adapter to use our EmbeddingsService with LangChain
|
||||||
interface QdrantRecord {
|
class CustomEmbeddings extends Embeddings {
|
||||||
id: string;
|
private embeddingsService: EmbeddingsService;
|
||||||
values: number[];
|
|
||||||
metadata?: Record<string, any>;
|
constructor() {
|
||||||
|
super({});
|
||||||
|
this.embeddingsService = EmbeddingsService.getInstance();
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedDocuments(documents: string[]): Promise<number[][]> {
|
||||||
|
await this.embeddingsService.initialize();
|
||||||
|
return await this.embeddingsService.encode(documents);
|
||||||
|
}
|
||||||
|
|
||||||
|
async embedQuery(query: string): Promise<number[]> {
|
||||||
|
await this.embeddingsService.initialize();
|
||||||
|
const embeddings = await this.embeddingsService.encode([query]);
|
||||||
|
return embeddings[0];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export class RAGService {
|
export class RAGService {
|
||||||
private static instance: RAGService;
|
private static instance: RAGService;
|
||||||
private pineconeService: QdrantService;
|
private vectorStore: QdrantVectorStore | null = null;
|
||||||
private embeddingsService: EmbeddingsService;
|
private embeddingsService: CustomEmbeddings;
|
||||||
private llm: ChatOpenAI | null = null;
|
private llm: ChatOpenAI | null = null;
|
||||||
private initialized: boolean = false;
|
private initialized: boolean = false;
|
||||||
private isInitializing: boolean = false;
|
private isInitializing: boolean = false;
|
||||||
|
private collectionName: string = 'entity-embeddings';
|
||||||
|
|
||||||
private constructor() {
|
private constructor() {
|
||||||
this.pineconeService = QdrantService.getInstance();
|
this.embeddingsService = new CustomEmbeddings();
|
||||||
this.embeddingsService = EmbeddingsService.getInstance();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -55,24 +70,52 @@ export class RAGService {
|
|||||||
try {
|
try {
|
||||||
console.log('Initializing RAG service...');
|
console.log('Initializing RAG service...');
|
||||||
|
|
||||||
// Initialize dependencies
|
// Check for NVIDIA API key
|
||||||
await this.pineconeService.initialize();
|
|
||||||
await this.embeddingsService.initialize();
|
|
||||||
|
|
||||||
// Initialize LLM - Try NVIDIA first, then fall back to error
|
|
||||||
const nvidiaApiKey = process.env.NVIDIA_API_KEY;
|
const nvidiaApiKey = process.env.NVIDIA_API_KEY;
|
||||||
if (!nvidiaApiKey) {
|
if (!nvidiaApiKey) {
|
||||||
throw new Error('RAG service requires NVIDIA_API_KEY to be set in environment variables. xAI integration has been removed.');
|
const error = new Error('NVIDIA_API_KEY is required for RAG service. Please set the NVIDIA_API_KEY environment variable.');
|
||||||
|
console.error('❌ RAG Initialization Error:', error.message);
|
||||||
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: This is a placeholder - NVIDIA LLM integration would need to be implemented
|
// Initialize NVIDIA LLM using ChatOpenAI with NVIDIA's base URL
|
||||||
// For now, we'll throw an error to indicate RAG service is not available
|
this.llm = new ChatOpenAI({
|
||||||
throw new Error('RAG service is temporarily unavailable after xAI removal. Please implement alternative LLM provider.');
|
modelName: "nvidia/llama-3.3-nemotron-super-49b-v1.5",
|
||||||
|
temperature: 0.2,
|
||||||
|
maxTokens: 1024,
|
||||||
|
openAIApiKey: nvidiaApiKey,
|
||||||
|
configuration: {
|
||||||
|
baseURL: "https://integrate.api.nvidia.com/v1",
|
||||||
|
timeout: 120000, // 120 second timeout
|
||||||
|
},
|
||||||
|
modelKwargs: {
|
||||||
|
top_p: 0.95,
|
||||||
|
frequency_penalty: 0,
|
||||||
|
presence_penalty: 0
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('✅ NVIDIA LLM initialized successfully');
|
||||||
|
|
||||||
|
// Initialize Qdrant vector store
|
||||||
|
const qdrantUrl = process.env.QDRANT_URL || 'http://localhost:6333';
|
||||||
|
|
||||||
|
this.vectorStore = await QdrantVectorStore.fromExistingCollection(
|
||||||
|
this.embeddingsService,
|
||||||
|
{
|
||||||
|
url: qdrantUrl,
|
||||||
|
collectionName: this.collectionName,
|
||||||
|
contentPayloadKey: 'text', // Map payload.text to pageContent
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log('✅ Qdrant vector store connected successfully');
|
||||||
|
|
||||||
this.initialized = true;
|
this.initialized = true;
|
||||||
console.log('RAG service initialized successfully');
|
console.log('✅ RAG service initialized successfully');
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error initializing RAG service:', error);
|
console.error('❌ Error initializing RAG service:', error);
|
||||||
|
this.isInitializing = false;
|
||||||
throw error;
|
throw error;
|
||||||
} finally {
|
} finally {
|
||||||
this.isInitializing = false;
|
this.isInitializing = false;
|
||||||
@ -80,7 +123,7 @@ export class RAGService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Store documents in Pinecone for retrieval
|
* Store documents in Qdrant for retrieval
|
||||||
* @param documents Array of text documents to store
|
* @param documents Array of text documents to store
|
||||||
* @param metadata Optional metadata for the documents
|
* @param metadata Optional metadata for the documents
|
||||||
*/
|
*/
|
||||||
@ -97,29 +140,28 @@ export class RAGService {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Storing ${documents.length} documents in Pinecone`);
|
if (!this.vectorStore) {
|
||||||
|
throw new Error('Vector store not initialized');
|
||||||
|
}
|
||||||
|
|
||||||
// Generate embeddings for documents
|
console.log(`Storing ${documents.length} documents in Qdrant`);
|
||||||
const embeddings = await this.embeddingsService.encode(documents);
|
|
||||||
|
|
||||||
// Prepare records for Pinecone
|
// Create Document objects with metadata
|
||||||
const records: PineconeRecord[] = embeddings.map((embedding, i) => ({
|
const docs = documents.map((text, i) => new Document({
|
||||||
id: `doc_${Date.now()}_${i}`,
|
pageContent: text,
|
||||||
values: embedding,
|
|
||||||
metadata: {
|
metadata: {
|
||||||
text: documents[i],
|
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
...(metadata && metadata[i] ? metadata[i] : {})
|
...(metadata && metadata[i] ? metadata[i] : {})
|
||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Store in Pinecone
|
// Store in Qdrant using LangChain
|
||||||
await this.pineconeService.upsertVectors(records);
|
await this.vectorStore.addDocuments(docs);
|
||||||
console.log(`Successfully stored ${records.length} document embeddings`);
|
console.log(`✅ Successfully stored ${docs.length} document embeddings`);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform question answering with document retrieval
|
* Perform question answering with document retrieval using proper RAG implementation
|
||||||
* @param query User query
|
* @param query User query
|
||||||
* @param topK Number of most similar documents to retrieve
|
* @param topK Number of most similar documents to retrieve
|
||||||
* @returns Answer generated from relevant context
|
* @returns Answer generated from relevant context
|
||||||
@ -133,14 +175,17 @@ export class RAGService {
|
|||||||
throw new Error('LLM not initialized');
|
throw new Error('LLM not initialized');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate embedding for query
|
if (!this.vectorStore) {
|
||||||
const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
|
throw new Error('Vector store not initialized');
|
||||||
|
}
|
||||||
|
|
||||||
// Retrieve similar documents from Pinecone
|
console.log(`🔍 Performing RAG query with topK=${topK}`);
|
||||||
const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK);
|
|
||||||
|
// Use LangChain's similarity search to retrieve relevant documents
|
||||||
|
const similarDocs = await this.vectorStore.similaritySearch(query, topK);
|
||||||
|
|
||||||
if (!similarDocs || similarDocs.length === 0) {
|
if (!similarDocs || similarDocs.length === 0) {
|
||||||
console.log('No relevant documents found, falling back to LLM');
|
console.log('⚠️ No relevant documents found, falling back to LLM general knowledge');
|
||||||
|
|
||||||
// Define prompt template for standalone LLM response
|
// Define prompt template for standalone LLM response
|
||||||
const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
|
const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
|
||||||
@ -167,15 +212,64 @@ Answer:
|
|||||||
return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
|
return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`✅ Found ${similarDocs.length} relevant documents`);
|
||||||
|
|
||||||
|
// Log first document structure for debugging
|
||||||
|
if (similarDocs.length > 0) {
|
||||||
|
console.log('📄 First document structure:', {
|
||||||
|
hasPageContent: !!similarDocs[0].pageContent,
|
||||||
|
pageContentLength: similarDocs[0].pageContent?.length || 0,
|
||||||
|
hasMetadata: !!similarDocs[0].metadata,
|
||||||
|
metadataKeys: similarDocs[0].metadata ? Object.keys(similarDocs[0].metadata) : []
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Extract text from retrieved documents
|
// Extract text from retrieved documents
|
||||||
|
// Support both pageContent (LangChain standard) and metadata.text (legacy format)
|
||||||
const context = similarDocs
|
const context = similarDocs
|
||||||
.map((doc: DocumentSearchResult) => doc.metadata?.text || '')
|
.map((doc) => {
|
||||||
.filter((text: string) => text.length > 0)
|
// Try pageContent first (LangChain standard)
|
||||||
|
if (doc.pageContent && doc.pageContent.trim().length > 0) {
|
||||||
|
return doc.pageContent;
|
||||||
|
}
|
||||||
|
// Fall back to metadata.text (legacy Qdrant storage format)
|
||||||
|
if (doc.metadata?.text && doc.metadata.text.trim().length > 0) {
|
||||||
|
return doc.metadata.text;
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
})
|
||||||
|
.filter((text) => text.length > 0)
|
||||||
.join('\n\n');
|
.join('\n\n');
|
||||||
|
|
||||||
// Define prompt template for QA
|
console.log(`📝 Extracted context length: ${context.length} characters`);
|
||||||
|
|
||||||
|
if (!context || context.trim().length === 0) {
|
||||||
|
console.log('⚠️ Retrieved documents have no content, falling back to LLM');
|
||||||
|
const fallbackPromptTemplate = PromptTemplate.fromTemplate(`
|
||||||
|
You are a helpful assistant answering questions based on your general knowledge.
|
||||||
|
|
||||||
|
Question: {query}
|
||||||
|
|
||||||
|
Answer:
|
||||||
|
`);
|
||||||
|
|
||||||
|
const fallbackChain = RunnableSequence.from([
|
||||||
|
{
|
||||||
|
query: () => query,
|
||||||
|
},
|
||||||
|
fallbackPromptTemplate,
|
||||||
|
this.llm,
|
||||||
|
new StringOutputParser(),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const answer = await fallbackChain.invoke({});
|
||||||
|
return `[Note: No specific information was found in the knowledge base. This answer is based on general knowledge.]\n\n${answer}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define prompt template for RAG
|
||||||
const promptTemplate = PromptTemplate.fromTemplate(`
|
const promptTemplate = PromptTemplate.fromTemplate(`
|
||||||
Answer the question based only on the following context:
|
Answer the question based only on the following context from the knowledge base.
|
||||||
|
If you cannot find the answer in the context, say "I cannot find this information in the knowledge base."
|
||||||
|
|
||||||
Context:
|
Context:
|
||||||
{context}
|
{context}
|
||||||
@ -185,7 +279,7 @@ Question: {query}
|
|||||||
Answer:
|
Answer:
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// Create retrieval chain
|
// Create retrieval chain using RunnableSequence
|
||||||
const retrievalChain = RunnableSequence.from([
|
const retrievalChain = RunnableSequence.from([
|
||||||
{
|
{
|
||||||
context: () => context,
|
context: () => context,
|
||||||
@ -196,9 +290,18 @@ Answer:
|
|||||||
new StringOutputParser(),
|
new StringOutputParser(),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
console.log('🤖 Generating answer with NVIDIA LLM...');
|
||||||
|
|
||||||
// Execute chain
|
// Execute chain
|
||||||
|
try {
|
||||||
const answer = await retrievalChain.invoke({});
|
const answer = await retrievalChain.invoke({});
|
||||||
|
console.log('✅ RAG query completed successfully');
|
||||||
|
console.log(`📝 Answer length: ${answer.length} characters`);
|
||||||
return answer;
|
return answer;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('❌ Error generating answer with NVIDIA LLM:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -215,15 +318,16 @@ Answer:
|
|||||||
await this.initialize();
|
await this.initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate embedding for query
|
if (!this.vectorStore) {
|
||||||
const queryEmbedding = (await this.embeddingsService.encode([query]))[0];
|
throw new Error('Vector store not initialized');
|
||||||
|
}
|
||||||
|
|
||||||
// Retrieve similar documents from Pinecone
|
// Use LangChain's similarity search with scores
|
||||||
const similarDocs = await this.pineconeService.findSimilarDocuments(queryEmbedding, topK);
|
const results = await this.vectorStore.similaritySearchWithScore(query, topK);
|
||||||
|
|
||||||
return similarDocs.map((doc: DocumentSearchResult) => ({
|
return results.map(([doc, score]) => ({
|
||||||
text: doc.metadata?.text || '',
|
text: doc.pageContent,
|
||||||
score: doc.score,
|
score: score,
|
||||||
metadata: doc.metadata
|
metadata: doc.metadata
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,6 +19,7 @@
|
|||||||
"@langchain/community": "^0.3.40",
|
"@langchain/community": "^0.3.40",
|
||||||
"@langchain/core": "^0.3.43",
|
"@langchain/core": "^0.3.43",
|
||||||
"@langchain/openai": "^0.5.2",
|
"@langchain/openai": "^0.5.2",
|
||||||
|
"@qdrant/js-client-rest": "^1.11.0",
|
||||||
"@radix-ui/react-alert-dialog": "^1.1.4",
|
"@radix-ui/react-alert-dialog": "^1.1.4",
|
||||||
"@radix-ui/react-avatar": "^1.1.2",
|
"@radix-ui/react-avatar": "^1.1.2",
|
||||||
"@radix-ui/react-checkbox": "^1.1.3",
|
"@radix-ui/react-checkbox": "^1.1.3",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user