// // SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // import axios from 'axios'; import { GraphDBService, GraphDBType } from './graph-db-service'; import { QdrantService } from './qdrant'; import { getGraphDbService } from './graph-db-util'; import type { Triple } from '@/types/graph'; /** * Backend service that combines graph database for storage and Qdrant for embeddings * * Two distinct modes: * 1. Knowledge Graph Mode: Stores triples in graph DB + entity names in 'entity-embeddings' collection * 2. Pure RAG Mode: Stores document chunks in 'document-embeddings' collection (via RAGService) * * Use processTriples() for knowledge graph ingestion * Use storeDocumentChunks() for Pure RAG document ingestion */ export class BackendService { private graphDBService: GraphDBService; private pineconeService: QdrantService; private sentenceTransformerUrl: string = 'http://sentence-transformers:80'; private modelName: string = 'all-MiniLM-L6-v2'; private static instance: BackendService; private initialized: boolean = false; private activeGraphDbType: GraphDBType = 'arangodb'; private constructor() { this.graphDBService = GraphDBService.getInstance(); this.pineconeService = QdrantService.getInstance(); // Use environment variables if available if (process.env.SENTENCE_TRANSFORMER_URL) { this.sentenceTransformerUrl = process.env.SENTENCE_TRANSFORMER_URL; } if (process.env.MODEL_NAME) { this.modelName = process.env.MODEL_NAME; } } /** * Get the singleton instance of BackendService */ public static getInstance(): BackendService { if (!BackendService.instance) { BackendService.instance = new BackendService(); } return BackendService.instance; } /** * Initialize the backend services * @param graphDbType - Type of graph database to use (neo4j or arangodb) */ public async initialize(graphDbType: GraphDBType = 'arangodb'): Promise { this.activeGraphDbType = graphDbType; // Initialize Graph Database if (!this.graphDBService.isInitialized()) { try { // Get the appropriate service based on type const graphDbService = getGraphDbService(graphDbType); // Try to get settings from server settings API first let serverSettings: Record = {}; try { const response = await fetch('/api/settings'); if (response.ok) { const data = await response.json(); serverSettings = data.settings || {}; console.log('Successfully loaded settings from server API'); } } catch (error) { console.log('Failed to load settings from server API, falling back to environment variables:', error); } if (graphDbType === 'neo4j') { // Get Neo4j credentials from server settings first, then fallback to environment const uri = serverSettings.neo4j_url || process.env.NEO4J_URI; const username = serverSettings.neo4j_user || process.env.NEO4J_USER || process.env.NEO4J_USERNAME; const password = serverSettings.neo4j_password || process.env.NEO4J_PASSWORD; console.log(`Using Neo4j URI: ${uri}`); await this.graphDBService.initialize('neo4j', uri, username, password); } else { // Prioritize environment variables over server settings for Docker deployments const url = process.env.ARANGODB_URL || serverSettings.arango_url || 'http://localhost:8529'; const dbName = process.env.ARANGODB_DB || serverSettings.arango_db || 'txt2kg'; const username = process.env.ARANGODB_USER || serverSettings.arango_user; const password = process.env.ARANGODB_PASSWORD || serverSettings.arango_password; console.log(`Using ArangoDB URL: ${url}`); console.log(`Using ArangoDB database: ${dbName}`); await this.graphDBService.initialize('arangodb', url, username, password); } console.log(`${graphDbType} initialized successfully in backend service`); } catch (error) { console.error(`Failed to initialize ${graphDbType} in backend service:`, error); if (process.env.NODE_ENV === 'development') { console.log('Development mode: Continuing despite graph database initialization error'); } else { throw new Error('Graph database service initialization failed'); } } } // Initialize Pinecone if (!this.pineconeService.isInitialized()) { await this.pineconeService.initialize(); } // Check if sentence-transformer service is available try { // Remove the check skip in development mode const response = await axios.get(`${this.sentenceTransformerUrl}/health`); console.log(`Connected to SentenceTransformer service: ${response.data.model}`); this.initialized = true; } catch (error) { console.error(`Failed to connect to sentence-transformer service: ${error}`); if (process.env.NODE_ENV === 'development') { console.log('Development mode: Continuing despite sentence transformer error'); this.initialized = true; } else { throw new Error('Sentence transformer service is not available'); } } } /** * Check if the backend is initialized */ public get isInitialized(): boolean { return this.initialized && this.graphDBService.isInitialized(); } /** * Get the active graph database type */ public getGraphDbType(): GraphDBType { return this.activeGraphDbType; } /** * Generate embeddings using the sentence-transformer service */ private async generateEmbeddings(texts: string[]): Promise { try { const response = await axios.post(`${this.sentenceTransformerUrl}/embed`, { texts, batch_size: 32 }); return response.data.embeddings; } catch (error) { console.error(`Error generating embeddings: ${error}`); throw new Error('Failed to generate embeddings'); } } /** * Convert our triple format to database format */ private convertTriples(triples: Triple[]): { subject: string; predicate: string; object: string }[] { return triples.map(triple => ({ subject: triple.subject, predicate: triple.predicate, object: triple.object })); } /** * Process and store triples in graph database and embeddings in Pinecone */ public async processTriples(triples: Triple[]): Promise { // Preprocess triples: lowercase and remove duplicates const processedTriples = triples.map(triple => ({ subject: triple.subject.toLowerCase(), predicate: triple.predicate.toLowerCase(), object: triple.object.toLowerCase() })); // Remove duplicate triples const uniqueTriples = Array.from( new Map(processedTriples.map(triple => [JSON.stringify(triple), triple])).values() ); console.log(`Processed ${triples.length} triples, removed ${triples.length - uniqueTriples.length} duplicates`); // Store triples in graph database console.log(`Storing triples in ${this.activeGraphDbType} database`); await this.graphDBService.importTriples(this.convertTriples(uniqueTriples)); // Extract unique entities from triples const entities = new Set(); for (const triple of uniqueTriples) { entities.add(triple.subject); // subject entities.add(triple.object); // object } // Generate embeddings for entities in batches const entityList = Array.from(entities); const batchSize = 256; const entityEmbeddings = new Map(); const textContent = new Map(); // Map for text content console.log(`Generating embeddings for ${entityList.length} entities`); for (let i = 0; i < entityList.length; i += batchSize) { const batch = entityList.slice(i, i + batchSize); console.log(`Processing batch ${Math.floor(i/batchSize) + 1}/${Math.ceil(entityList.length/batchSize)}`); const embeddings = await this.generateEmbeddings(batch); // Store in maps for (let j = 0; j < batch.length; j++) { entityEmbeddings.set(batch[j], embeddings[j]); textContent.set(batch[j], batch[j]); // Store the entity name as text content } } // Store embeddings and text content in Pinecone await this.pineconeService.storeEmbeddings(entityEmbeddings, textContent); console.log(`Backend processing complete: ${uniqueTriples.length} triples and ${entityList.length} entities stored using ${this.activeGraphDbType}`); } /** * Perform a traditional query using direct pattern matching on the graph * This bypasses the vector embeddings and uses text matching */ public async queryTraditional(queryText: string): Promise { console.log(`Performing traditional graph query: "${queryText}"`); // Extract keywords from query const keywords = this.extractKeywords(queryText); console.log(`Extracted keywords: ${keywords.join(', ')}`); // Filter out stop words const filteredKeywords = keywords.filter(kw => !this.isStopWord(kw)); // If using ArangoDB, use its native graph traversal capabilities if (this.activeGraphDbType === 'arangodb') { console.log(`Using ArangoDB native graph traversal for keywords: ${filteredKeywords.join(', ')}`); try { const results = await this.graphDBService.graphTraversal(filteredKeywords, 2, 100); console.log(`ArangoDB graph traversal found ${results.length} relevant triples`); // Log top 10 results with confidence scores for debugging console.log('Top 10 triples by confidence:'); results.slice(0, 10).forEach((triple, idx) => { console.log(` ${idx + 1}. [${triple.confidence.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth: ${triple.depth})`); }); return results; } catch (error) { console.error('Error using ArangoDB graph traversal, falling back to traditional method:', error); // Fall through to traditional method if ArangoDB traversal fails } } // Fallback to traditional keyword matching for Neo4j or if ArangoDB traversal fails console.log(`Using fallback keyword-based search`); // Get graph data from graph database const graphData = await this.graphDBService.getGraphData(); console.log(`Retrieved graph from ${this.activeGraphDbType} with ${graphData.nodes.length} nodes and ${graphData.relationships.length} relationships`); // Create a map of node IDs to names const nodeIdToName = new Map(); for (const node of graphData.nodes) { nodeIdToName.set(node.id, node.name); } // Find matching nodes based on keywords const matchingNodeIds = new Set(); for (const node of graphData.nodes) { for (const keyword of filteredKeywords) { // Simple text matching - convert to lowercase for case-insensitive matching if (node.name.toLowerCase().includes(keyword.toLowerCase())) { matchingNodeIds.add(node.id); break; } } } console.log(`Found ${matchingNodeIds.size} nodes matching keywords directly`); // Find relationships where either subject or object matches const relevantTriples: Triple[] = []; for (const rel of graphData.relationships) { // Check if either end of the relationship matches our search const isSourceMatching = matchingNodeIds.has(rel.source); const isTargetMatching = matchingNodeIds.has(rel.target); if (isSourceMatching || isTargetMatching) { const sourceName = nodeIdToName.get(rel.source); const targetName = nodeIdToName.get(rel.target); if (sourceName && targetName) { // Check if the relationship type matches keywords let matchesRelationship = false; for (const keyword of filteredKeywords) { if (rel.type.toLowerCase().includes(keyword.toLowerCase())) { matchesRelationship = true; break; } } // Higher relevance to relationships that match the query directly const relevance = (isSourceMatching ? 1 : 0) + (isTargetMatching ? 1 : 0) + (matchesRelationship ? 2 : 0); if (relevance > 0) { relevantTriples.push({ subject: sourceName, predicate: rel.type, object: targetName, confidence: relevance / 4.0 // Scale from 0 to 1 }); } } } } // Sort by confidence (highest first) relevantTriples.sort((a, b) => (b.confidence || 0) - (a.confidence || 0) ); // Return all relevant triples, sorted by relevance console.log(`Found ${relevantTriples.length} relevant triples with traditional search`); return relevantTriples; } /** * Extract keywords from query text */ private extractKeywords(text: string): string[] { return text.toLowerCase() .replace(/[.,?!;:()]/g, ' ') // Remove punctuation .split(/\s+/) // Split by whitespace .filter(word => word.length > 2); // Filter out very short words } /** * Check if a word is a common stop word */ private isStopWord(word: string): boolean { const stopWords = new Set([ 'the', 'and', 'are', 'for', 'was', 'with', 'how', 'what', 'why', 'who', 'when', 'which', 'many', 'much', 'from', 'have', 'has', 'had', 'that', 'this', 'these', 'those', 'they', 'their' ]); return stopWords.has(word.toLowerCase()); } /** * Query the backend for relevant information */ public async query( queryText: string, kNeighbors: number = 4096, fanout: number = 400, numHops: number = 2, useTraditional: boolean = false ): Promise { console.log(`Querying backend with database type: ${this.activeGraphDbType}, useTraditional: ${useTraditional}`); // If using traditional search, bypass the vector embeddings if (useTraditional) { return this.queryTraditional(queryText); } // Generate embedding for query const queryEmbedding = (await this.generateEmbeddings([queryText]))[0]; // Find nearest neighbors using Pinecone const seedNodes = await this.pineconeService.findSimilarEntities(queryEmbedding, kNeighbors); console.log(`Found ${seedNodes.length} seed nodes for query: "${queryText}"`); // Get graph data from graph database const graphData = await this.graphDBService.getGraphData(); console.log(`Retrieved graph from ${this.activeGraphDbType} with ${graphData.nodes.length} nodes and ${graphData.relationships.length} relationships`); // Build adjacency map for neighborhood exploration const adjacencyMap = new Map(); // Map Neo4j IDs to entity names const nodeIdToName = new Map(); for (const node of graphData.nodes) { nodeIdToName.set(node.id, node.name); adjacencyMap.set(node.name, []); } // Build adjacency lists for (const rel of graphData.relationships) { const sourceName = nodeIdToName.get(rel.source); const targetName = nodeIdToName.get(rel.target); if (sourceName && targetName) { const neighbors = adjacencyMap.get(sourceName) || []; neighbors.push(targetName); adjacencyMap.set(sourceName, neighbors); } } // Perform multi-hop exploration const visitedNodes = new Set(seedNodes); const nodesToExplore = [...seedNodes]; for (let hop = 0; hop < numHops; hop++) { const currentNodes = [...nodesToExplore]; nodesToExplore.length = 0; // Clear the array for (const node of currentNodes) { const neighbors = adjacencyMap.get(node) || []; const limitedNeighbors = neighbors.slice(0, fanout); for (const neighbor of limitedNeighbors) { if (!visitedNodes.has(neighbor)) { visitedNodes.add(neighbor); nodesToExplore.push(neighbor); } } } console.log(`Hop ${hop+1}: Explored ${currentNodes.length} nodes, found ${nodesToExplore.length} new neighbors`); } // Extract relevant triples const relevantTriples: Triple[] = []; for (const rel of graphData.relationships) { const sourceName = nodeIdToName.get(rel.source); const targetName = nodeIdToName.get(rel.target); if (sourceName && targetName && (visitedNodes.has(sourceName) || visitedNodes.has(targetName))) { // Include relationship type from metadata const predicate = rel.type === 'RELATIONSHIP' ? rel.type : rel.type; relevantTriples.push({ subject: sourceName, predicate: predicate, object: targetName }); } } // Apply local filtering (simplified version of PCST algorithm) // Just return top N triples for simplicity const topK = 5; // topk parameter from the Python example console.log(`Found ${relevantTriples.length} relevant triples, returning top ${topK * 5}`); return relevantTriples.slice(0, topK * 5); } /** * Query with LLM enhancement: retrieve triples and use LLM to generate answer * This makes traditional graph search comparable to RAG by adding LLM generation * @param queryText - The user's question * @param topK - Number of top triples to use as context (default 5) * @param useTraditional - Whether to use traditional (keyword-based) or vector search * @param llmModel - Optional LLM model to use (defaults to environment variable) * @param llmProvider - Optional LLM provider (ollama, nvidia, etc.) * @returns Generated answer from LLM based on retrieved triples */ public async queryWithLLM( queryText: string, topK: number = 5, useTraditional: boolean = true, llmModel?: string, llmProvider?: string ): Promise<{ answer: string; triples: Triple[]; count: number }> { console.log(`Querying with LLM enhancement: "${queryText}", topK=${topK}, traditional=${useTraditional}`); // Step 1: Retrieve relevant triples using graph search const allTriples = await this.query(queryText, 4096, 400, 2, useTraditional); // Step 2: Take top K triples for context const topTriples = allTriples.slice(0, topK); console.log(`Using top ${topTriples.length} triples as context for LLM`); // DEBUG: Log first triple to verify depth/pathLength are present if (topTriples.length > 0) { console.log('First triple structure:', JSON.stringify(topTriples[0], null, 2)); } if (topTriples.length === 0) { return { answer: "I couldn't find any relevant information in the knowledge graph to answer this question.", triples: [], count: 0 }; } // Step 3: Format triples as natural language context const context = topTriples .map(triple => { // Convert triple to natural language const predicate = triple.predicate .replace(/_/g, ' ') .replace(/-/g, ' ') .toLowerCase(); return `${triple.subject} ${predicate} ${triple.object}`; }) .join('. '); // Step 4: Use LLM to generate answer from context try { // Simplified prompt to work better with NVIDIA Nemotron's natural reasoning format const prompt = `Answer the question based on the following context from the knowledge graph. Context: ${context} Question: ${queryText} Answer:`; // Determine LLM endpoint and model based on provider const finalProvider = llmProvider || 'ollama'; const finalModel = llmModel || process.env.OLLAMA_MODEL || 'llama3.1:8b'; console.log(`Using LLM: provider=${finalProvider}, model=${finalModel}`); let response; if (finalProvider === 'nvidia') { // Use NVIDIA API const nvidiaApiKey = process.env.NVIDIA_API_KEY; if (!nvidiaApiKey) { throw new Error('NVIDIA_API_KEY is required for NVIDIA provider. Please set the NVIDIA_API_KEY environment variable.'); } const nvidiaUrl = 'https://integrate.api.nvidia.com/v1'; // Note: NVIDIA API doesn't support streaming in axios, so we'll use non-streaming // and format the thinking content into tags manually response = await axios.post(`${nvidiaUrl}/chat/completions`, { model: finalModel, messages: [ { role: 'system', content: '/think' // Special NVIDIA API command to activate thinking mode }, { role: 'user', content: prompt } ], temperature: 0.2, max_tokens: 4096, top_p: 0.95, frequency_penalty: 0, presence_penalty: 0, stream: false, // We need non-streaming to get thinking tokens // NVIDIA-specific thinking token parameters min_thinking_tokens: 1024, max_thinking_tokens: 2048 }, { headers: { 'Authorization': `Bearer ${nvidiaApiKey}`, 'Content-Type': 'application/json' }, timeout: 120000 // 120 second timeout }); } else { // Use Ollama (default) const ollamaUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434/v1'; response = await axios.post(`${ollamaUrl}/chat/completions`, { model: finalModel, messages: [ { role: 'system', content: 'You are a knowledgeable research assistant specializing in biomedical and scientific literature. Provide accurate, well-structured answers based on the provided context. Maintain a professional yet accessible tone, and clearly indicate when information is limited or uncertain.' }, { role: 'user', content: prompt } ], temperature: 0.2, // Lower for more factual, consistent responses max_tokens: 800 // Increased for more comprehensive answers }); } // Extract answer and reasoning (if using NVIDIA with thinking tokens) const messageData = response.data.choices[0].message; let answer = messageData.content || ''; // Check if NVIDIA API returned reasoning_content (thinking tokens) if (finalProvider === 'nvidia' && messageData.reasoning_content) { // Format with tags for UI parsing answer = `\n${messageData.reasoning_content}\n\n\n${answer}`; console.log('Formatted response with thinking content'); } // DEBUG: Log triples before returning to verify they still have depth/pathLength console.log('Returning triples (first one):', JSON.stringify(topTriples[0], null, 2)); return { answer, triples: topTriples, count: topTriples.length }; } catch (error) { console.error('Error calling LLM for answer generation:', error); // Fallback: return triples without LLM enhancement return { answer: `Found ${topTriples.length} relevant triples:\n\n${context}`, triples: topTriples, count: topTriples.length }; } } /** * Store document chunks for Pure RAG (separate from entity embeddings) * This stores full text chunks rather than just entity names * @param documents Array of document text chunks * @param metadata Optional metadata for each document */ public async storeDocumentChunks( documents: string[], metadata?: Record[] ): Promise { console.log(`Storing ${documents.length} document chunks for Pure RAG`); // Generate embeddings for document chunks const embeddings = await this.generateEmbeddings(documents); // Store in Qdrant document-embeddings collection await this.pineconeService.storeDocumentChunks(documents, embeddings, metadata); console.log(`✅ Stored ${documents.length} document chunks in document-embeddings collection`); } /** * Close connections to backend services */ public async close(): Promise { if (this.graphDBService.isInitialized()) { this.graphDBService.close(); } console.log('Backend service closed'); } } export default BackendService.getInstance();