mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 02:23:53 +00:00
668 lines
25 KiB
TypeScript
668 lines
25 KiB
TypeScript
//
|
|
// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
import axios from 'axios';
|
|
import { GraphDBService, GraphDBType } from './graph-db-service';
|
|
import { QdrantService } from './qdrant';
|
|
import { getGraphDbService } from './graph-db-util';
|
|
import type { Triple } from '@/types/graph';
|
|
|
|
/**
|
|
* Backend service that combines graph database for storage and Qdrant for embeddings
|
|
*
|
|
* Two distinct modes:
|
|
* 1. Knowledge Graph Mode: Stores triples in graph DB + entity names in 'entity-embeddings' collection
|
|
* 2. Pure RAG Mode: Stores document chunks in 'document-embeddings' collection (via RAGService)
|
|
*
|
|
* Use processTriples() for knowledge graph ingestion
|
|
* Use storeDocumentChunks() for Pure RAG document ingestion
|
|
*/
|
|
export class BackendService {
|
|
private graphDBService: GraphDBService;
|
|
private pineconeService: QdrantService;
|
|
private sentenceTransformerUrl: string = 'http://sentence-transformers:80';
|
|
private modelName: string = 'all-MiniLM-L6-v2';
|
|
private static instance: BackendService;
|
|
private initialized: boolean = false;
|
|
private activeGraphDbType: GraphDBType = 'arangodb';
|
|
|
|
private constructor() {
|
|
this.graphDBService = GraphDBService.getInstance();
|
|
this.pineconeService = QdrantService.getInstance();
|
|
|
|
// Use environment variables if available
|
|
if (process.env.SENTENCE_TRANSFORMER_URL) {
|
|
this.sentenceTransformerUrl = process.env.SENTENCE_TRANSFORMER_URL;
|
|
}
|
|
if (process.env.MODEL_NAME) {
|
|
this.modelName = process.env.MODEL_NAME;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the singleton instance of BackendService
|
|
*/
|
|
public static getInstance(): BackendService {
|
|
if (!BackendService.instance) {
|
|
BackendService.instance = new BackendService();
|
|
}
|
|
return BackendService.instance;
|
|
}
|
|
|
|
/**
|
|
* Initialize the backend services
|
|
* @param graphDbType - Type of graph database to use (neo4j or arangodb)
|
|
*/
|
|
public async initialize(graphDbType: GraphDBType = 'arangodb'): Promise<void> {
|
|
this.activeGraphDbType = graphDbType;
|
|
|
|
// Initialize Graph Database
|
|
if (!this.graphDBService.isInitialized()) {
|
|
try {
|
|
// Get the appropriate service based on type
|
|
const graphDbService = getGraphDbService(graphDbType);
|
|
|
|
// Try to get settings from server settings API first
|
|
let serverSettings: Record<string, string> = {};
|
|
try {
|
|
const response = await fetch('/api/settings');
|
|
if (response.ok) {
|
|
const data = await response.json();
|
|
serverSettings = data.settings || {};
|
|
console.log('Successfully loaded settings from server API');
|
|
}
|
|
} catch (error) {
|
|
console.log('Failed to load settings from server API, falling back to environment variables:', error);
|
|
}
|
|
|
|
if (graphDbType === 'neo4j') {
|
|
// Get Neo4j credentials from server settings first, then fallback to environment
|
|
const uri = serverSettings.neo4j_url || process.env.NEO4J_URI;
|
|
const username = serverSettings.neo4j_user || process.env.NEO4J_USER || process.env.NEO4J_USERNAME;
|
|
const password = serverSettings.neo4j_password || process.env.NEO4J_PASSWORD;
|
|
|
|
console.log(`Using Neo4j URI: ${uri}`);
|
|
await this.graphDBService.initialize('neo4j', uri, username, password);
|
|
} else {
|
|
// Prioritize environment variables over server settings for Docker deployments
|
|
const url = process.env.ARANGODB_URL || serverSettings.arango_url || 'http://localhost:8529';
|
|
const dbName = process.env.ARANGODB_DB || serverSettings.arango_db || 'txt2kg';
|
|
const username = process.env.ARANGODB_USER || serverSettings.arango_user;
|
|
const password = process.env.ARANGODB_PASSWORD || serverSettings.arango_password;
|
|
|
|
console.log(`Using ArangoDB URL: ${url}`);
|
|
console.log(`Using ArangoDB database: ${dbName}`);
|
|
await this.graphDBService.initialize('arangodb', url, username, password);
|
|
}
|
|
console.log(`${graphDbType} initialized successfully in backend service`);
|
|
} catch (error) {
|
|
console.error(`Failed to initialize ${graphDbType} in backend service:`, error);
|
|
if (process.env.NODE_ENV === 'development') {
|
|
console.log('Development mode: Continuing despite graph database initialization error');
|
|
} else {
|
|
throw new Error('Graph database service initialization failed');
|
|
}
|
|
}
|
|
}
|
|
|
|
// Initialize Pinecone
|
|
if (!this.pineconeService.isInitialized()) {
|
|
await this.pineconeService.initialize();
|
|
}
|
|
|
|
// Check if sentence-transformer service is available
|
|
try {
|
|
// Remove the check skip in development mode
|
|
const response = await axios.get(`${this.sentenceTransformerUrl}/health`);
|
|
console.log(`Connected to SentenceTransformer service: ${response.data.model}`);
|
|
this.initialized = true;
|
|
} catch (error) {
|
|
console.error(`Failed to connect to sentence-transformer service: ${error}`);
|
|
if (process.env.NODE_ENV === 'development') {
|
|
console.log('Development mode: Continuing despite sentence transformer error');
|
|
this.initialized = true;
|
|
} else {
|
|
throw new Error('Sentence transformer service is not available');
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if the backend is initialized
|
|
*/
|
|
public get isInitialized(): boolean {
|
|
return this.initialized && this.graphDBService.isInitialized();
|
|
}
|
|
|
|
/**
|
|
* Get the active graph database type
|
|
*/
|
|
public getGraphDbType(): GraphDBType {
|
|
return this.activeGraphDbType;
|
|
}
|
|
|
|
/**
|
|
* Generate embeddings using the sentence-transformer service
|
|
*/
|
|
private async generateEmbeddings(texts: string[]): Promise<number[][]> {
|
|
try {
|
|
const response = await axios.post(`${this.sentenceTransformerUrl}/embed`, {
|
|
texts,
|
|
batch_size: 32
|
|
});
|
|
|
|
return response.data.embeddings;
|
|
} catch (error) {
|
|
console.error(`Error generating embeddings: ${error}`);
|
|
throw new Error('Failed to generate embeddings');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert our triple format to database format
|
|
*/
|
|
private convertTriples(triples: Triple[]): { subject: string; predicate: string; object: string }[] {
|
|
return triples.map(triple => ({
|
|
subject: triple.subject,
|
|
predicate: triple.predicate,
|
|
object: triple.object
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Process and store triples in graph database and embeddings in Pinecone
|
|
*/
|
|
public async processTriples(triples: Triple[]): Promise<void> {
|
|
// Preprocess triples: lowercase and remove duplicates
|
|
const processedTriples = triples.map(triple => ({
|
|
subject: triple.subject.toLowerCase(),
|
|
predicate: triple.predicate.toLowerCase(),
|
|
object: triple.object.toLowerCase()
|
|
}));
|
|
|
|
// Remove duplicate triples
|
|
const uniqueTriples = Array.from(
|
|
new Map(processedTriples.map(triple => [JSON.stringify(triple), triple])).values()
|
|
);
|
|
|
|
console.log(`Processed ${triples.length} triples, removed ${triples.length - uniqueTriples.length} duplicates`);
|
|
|
|
// Store triples in graph database
|
|
console.log(`Storing triples in ${this.activeGraphDbType} database`);
|
|
await this.graphDBService.importTriples(this.convertTriples(uniqueTriples));
|
|
|
|
// Extract unique entities from triples
|
|
const entities = new Set<string>();
|
|
for (const triple of uniqueTriples) {
|
|
entities.add(triple.subject); // subject
|
|
entities.add(triple.object); // object
|
|
}
|
|
|
|
// Generate embeddings for entities in batches
|
|
const entityList = Array.from(entities);
|
|
const batchSize = 256;
|
|
const entityEmbeddings = new Map<string, number[]>();
|
|
const textContent = new Map<string, string>(); // Map for text content
|
|
|
|
console.log(`Generating embeddings for ${entityList.length} entities`);
|
|
|
|
for (let i = 0; i < entityList.length; i += batchSize) {
|
|
const batch = entityList.slice(i, i + batchSize);
|
|
console.log(`Processing batch ${Math.floor(i/batchSize) + 1}/${Math.ceil(entityList.length/batchSize)}`);
|
|
|
|
const embeddings = await this.generateEmbeddings(batch);
|
|
|
|
// Store in maps
|
|
for (let j = 0; j < batch.length; j++) {
|
|
entityEmbeddings.set(batch[j], embeddings[j]);
|
|
textContent.set(batch[j], batch[j]); // Store the entity name as text content
|
|
}
|
|
}
|
|
|
|
// Store embeddings and text content in Pinecone
|
|
await this.pineconeService.storeEmbeddings(entityEmbeddings, textContent);
|
|
|
|
console.log(`Backend processing complete: ${uniqueTriples.length} triples and ${entityList.length} entities stored using ${this.activeGraphDbType}`);
|
|
}
|
|
|
|
/**
|
|
* Perform a traditional query using direct pattern matching on the graph
|
|
* This bypasses the vector embeddings and uses text matching
|
|
*/
|
|
public async queryTraditional(queryText: string): Promise<Triple[]> {
|
|
console.log(`Performing traditional graph query: "${queryText}"`);
|
|
|
|
// Extract keywords from query
|
|
const keywords = this.extractKeywords(queryText);
|
|
console.log(`Extracted keywords: ${keywords.join(', ')}`);
|
|
|
|
// Filter out stop words
|
|
const filteredKeywords = keywords.filter(kw => !this.isStopWord(kw));
|
|
|
|
// If using ArangoDB, use its native graph traversal capabilities
|
|
if (this.activeGraphDbType === 'arangodb') {
|
|
console.log(`Using ArangoDB native graph traversal for keywords: ${filteredKeywords.join(', ')}`);
|
|
|
|
try {
|
|
const results = await this.graphDBService.graphTraversal(filteredKeywords, 2, 100);
|
|
console.log(`ArangoDB graph traversal found ${results.length} relevant triples`);
|
|
|
|
// Log top 10 results with confidence scores for debugging
|
|
console.log('Top 10 triples by confidence:');
|
|
results.slice(0, 10).forEach((triple, idx) => {
|
|
console.log(` ${idx + 1}. [${triple.confidence.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth: ${triple.depth})`);
|
|
});
|
|
|
|
return results;
|
|
} catch (error) {
|
|
console.error('Error using ArangoDB graph traversal, falling back to traditional method:', error);
|
|
// Fall through to traditional method if ArangoDB traversal fails
|
|
}
|
|
}
|
|
|
|
// Fallback to traditional keyword matching for Neo4j or if ArangoDB traversal fails
|
|
console.log(`Using fallback keyword-based search`);
|
|
|
|
// Get graph data from graph database
|
|
const graphData = await this.graphDBService.getGraphData();
|
|
console.log(`Retrieved graph from ${this.activeGraphDbType} with ${graphData.nodes.length} nodes and ${graphData.relationships.length} relationships`);
|
|
|
|
// Create a map of node IDs to names
|
|
const nodeIdToName = new Map<string, string>();
|
|
for (const node of graphData.nodes) {
|
|
nodeIdToName.set(node.id, node.name);
|
|
}
|
|
|
|
// Find matching nodes based on keywords
|
|
const matchingNodeIds = new Set<string>();
|
|
for (const node of graphData.nodes) {
|
|
for (const keyword of filteredKeywords) {
|
|
// Simple text matching - convert to lowercase for case-insensitive matching
|
|
if (node.name.toLowerCase().includes(keyword.toLowerCase())) {
|
|
matchingNodeIds.add(node.id);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`Found ${matchingNodeIds.size} nodes matching keywords directly`);
|
|
|
|
// Find relationships where either subject or object matches
|
|
const relevantTriples: Triple[] = [];
|
|
|
|
for (const rel of graphData.relationships) {
|
|
// Check if either end of the relationship matches our search
|
|
const isSourceMatching = matchingNodeIds.has(rel.source);
|
|
const isTargetMatching = matchingNodeIds.has(rel.target);
|
|
|
|
if (isSourceMatching || isTargetMatching) {
|
|
const sourceName = nodeIdToName.get(rel.source);
|
|
const targetName = nodeIdToName.get(rel.target);
|
|
|
|
if (sourceName && targetName) {
|
|
// Check if the relationship type matches keywords
|
|
let matchesRelationship = false;
|
|
for (const keyword of filteredKeywords) {
|
|
if (rel.type.toLowerCase().includes(keyword.toLowerCase())) {
|
|
matchesRelationship = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Higher relevance to relationships that match the query directly
|
|
const relevance = (isSourceMatching ? 1 : 0) +
|
|
(isTargetMatching ? 1 : 0) +
|
|
(matchesRelationship ? 2 : 0);
|
|
|
|
if (relevance > 0) {
|
|
relevantTriples.push({
|
|
subject: sourceName,
|
|
predicate: rel.type,
|
|
object: targetName,
|
|
confidence: relevance / 4.0 // Scale from 0 to 1
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by confidence (highest first)
|
|
relevantTriples.sort((a, b) =>
|
|
(b.confidence || 0) - (a.confidence || 0)
|
|
);
|
|
|
|
// Return all relevant triples, sorted by relevance
|
|
console.log(`Found ${relevantTriples.length} relevant triples with traditional search`);
|
|
return relevantTriples;
|
|
}
|
|
|
|
/**
|
|
* Extract keywords from query text
|
|
*/
|
|
private extractKeywords(text: string): string[] {
|
|
return text.toLowerCase()
|
|
.replace(/[.,?!;:()]/g, ' ') // Remove punctuation
|
|
.split(/\s+/) // Split by whitespace
|
|
.filter(word => word.length > 2); // Filter out very short words
|
|
}
|
|
|
|
/**
|
|
* Check if a word is a common stop word
|
|
*/
|
|
private isStopWord(word: string): boolean {
|
|
const stopWords = new Set([
|
|
'the', 'and', 'are', 'for', 'was', 'with',
|
|
'how', 'what', 'why', 'who', 'when', 'which',
|
|
'many', 'much', 'from', 'have', 'has', 'had',
|
|
'that', 'this', 'these', 'those', 'they', 'their'
|
|
]);
|
|
return stopWords.has(word.toLowerCase());
|
|
}
|
|
|
|
/**
|
|
* Query the backend for relevant information
|
|
*/
|
|
public async query(
|
|
queryText: string,
|
|
kNeighbors: number = 4096,
|
|
fanout: number = 400,
|
|
numHops: number = 2,
|
|
useTraditional: boolean = false
|
|
): Promise<Triple[]> {
|
|
console.log(`Querying backend with database type: ${this.activeGraphDbType}, useTraditional: ${useTraditional}`);
|
|
|
|
// If using traditional search, bypass the vector embeddings
|
|
if (useTraditional) {
|
|
return this.queryTraditional(queryText);
|
|
}
|
|
|
|
// Generate embedding for query
|
|
const queryEmbedding = (await this.generateEmbeddings([queryText]))[0];
|
|
|
|
// Find nearest neighbors using Pinecone
|
|
const seedNodes = await this.pineconeService.findSimilarEntities(queryEmbedding, kNeighbors);
|
|
console.log(`Found ${seedNodes.length} seed nodes for query: "${queryText}"`);
|
|
|
|
// Get graph data from graph database
|
|
const graphData = await this.graphDBService.getGraphData();
|
|
console.log(`Retrieved graph from ${this.activeGraphDbType} with ${graphData.nodes.length} nodes and ${graphData.relationships.length} relationships`);
|
|
|
|
// Build adjacency map for neighborhood exploration
|
|
const adjacencyMap = new Map<string, string[]>();
|
|
|
|
// Map Neo4j IDs to entity names
|
|
const nodeIdToName = new Map<string, string>();
|
|
for (const node of graphData.nodes) {
|
|
nodeIdToName.set(node.id, node.name);
|
|
adjacencyMap.set(node.name, []);
|
|
}
|
|
|
|
// Build adjacency lists
|
|
for (const rel of graphData.relationships) {
|
|
const sourceName = nodeIdToName.get(rel.source);
|
|
const targetName = nodeIdToName.get(rel.target);
|
|
|
|
if (sourceName && targetName) {
|
|
const neighbors = adjacencyMap.get(sourceName) || [];
|
|
neighbors.push(targetName);
|
|
adjacencyMap.set(sourceName, neighbors);
|
|
}
|
|
}
|
|
|
|
// Perform multi-hop exploration
|
|
const visitedNodes = new Set<string>(seedNodes);
|
|
const nodesToExplore = [...seedNodes];
|
|
|
|
for (let hop = 0; hop < numHops; hop++) {
|
|
const currentNodes = [...nodesToExplore];
|
|
nodesToExplore.length = 0; // Clear the array
|
|
|
|
for (const node of currentNodes) {
|
|
const neighbors = adjacencyMap.get(node) || [];
|
|
const limitedNeighbors = neighbors.slice(0, fanout);
|
|
|
|
for (const neighbor of limitedNeighbors) {
|
|
if (!visitedNodes.has(neighbor)) {
|
|
visitedNodes.add(neighbor);
|
|
nodesToExplore.push(neighbor);
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`Hop ${hop+1}: Explored ${currentNodes.length} nodes, found ${nodesToExplore.length} new neighbors`);
|
|
}
|
|
|
|
// Extract relevant triples
|
|
const relevantTriples: Triple[] = [];
|
|
|
|
for (const rel of graphData.relationships) {
|
|
const sourceName = nodeIdToName.get(rel.source);
|
|
const targetName = nodeIdToName.get(rel.target);
|
|
|
|
if (sourceName && targetName &&
|
|
(visitedNodes.has(sourceName) || visitedNodes.has(targetName))) {
|
|
// Include relationship type from metadata
|
|
const predicate = rel.type === 'RELATIONSHIP' ? rel.type : rel.type;
|
|
relevantTriples.push({
|
|
subject: sourceName,
|
|
predicate: predicate,
|
|
object: targetName
|
|
});
|
|
}
|
|
}
|
|
|
|
// Apply local filtering (simplified version of PCST algorithm)
|
|
// Just return top N triples for simplicity
|
|
const topK = 5; // topk parameter from the Python example
|
|
|
|
console.log(`Found ${relevantTriples.length} relevant triples, returning top ${topK * 5}`);
|
|
return relevantTriples.slice(0, topK * 5);
|
|
}
|
|
|
|
/**
|
|
* Query with LLM enhancement: retrieve triples and use LLM to generate answer
|
|
* This makes traditional graph search comparable to RAG by adding LLM generation
|
|
* @param queryText - The user's question
|
|
* @param topK - Number of top triples to use as context (default 5)
|
|
* @param useTraditional - Whether to use traditional (keyword-based) or vector search
|
|
* @param llmModel - Optional LLM model to use (defaults to environment variable)
|
|
* @param llmProvider - Optional LLM provider (ollama, nvidia, etc.)
|
|
* @returns Generated answer from LLM based on retrieved triples
|
|
*/
|
|
public async queryWithLLM(
|
|
queryText: string,
|
|
topK: number = 5,
|
|
useTraditional: boolean = true,
|
|
llmModel?: string,
|
|
llmProvider?: string
|
|
): Promise<{ answer: string; triples: Triple[]; count: number }> {
|
|
console.log(`Querying with LLM enhancement: "${queryText}", topK=${topK}, traditional=${useTraditional}`);
|
|
|
|
// Step 1: Retrieve relevant triples using graph search
|
|
const allTriples = await this.query(queryText, 4096, 400, 2, useTraditional);
|
|
|
|
// Step 2: Take top K triples for context
|
|
const topTriples = allTriples.slice(0, topK);
|
|
console.log(`Using top ${topTriples.length} triples as context for LLM`);
|
|
|
|
// DEBUG: Log first triple to verify depth/pathLength are present
|
|
if (topTriples.length > 0) {
|
|
console.log('First triple structure:', JSON.stringify(topTriples[0], null, 2));
|
|
}
|
|
|
|
if (topTriples.length === 0) {
|
|
return {
|
|
answer: "I couldn't find any relevant information in the knowledge graph to answer this question.",
|
|
triples: [],
|
|
count: 0
|
|
};
|
|
}
|
|
|
|
// Step 3: Format triples as natural language context
|
|
const context = topTriples
|
|
.map(triple => {
|
|
// Convert triple to natural language
|
|
const predicate = triple.predicate
|
|
.replace(/_/g, ' ')
|
|
.replace(/-/g, ' ')
|
|
.toLowerCase();
|
|
return `${triple.subject} ${predicate} ${triple.object}`;
|
|
})
|
|
.join('. ');
|
|
|
|
// Step 4: Use LLM to generate answer from context
|
|
try {
|
|
// Simplified prompt to work better with NVIDIA Nemotron's natural reasoning format
|
|
const prompt = `Answer the question based on the following context from the knowledge graph.
|
|
|
|
Context:
|
|
${context}
|
|
|
|
Question: ${queryText}
|
|
|
|
Answer:`;
|
|
|
|
// Determine LLM endpoint and model based on provider
|
|
const finalProvider = llmProvider || 'ollama';
|
|
const finalModel = llmModel || process.env.OLLAMA_MODEL || 'llama3.1:8b';
|
|
|
|
console.log(`Using LLM: provider=${finalProvider}, model=${finalModel}`);
|
|
|
|
let response;
|
|
|
|
if (finalProvider === 'nvidia') {
|
|
// Use NVIDIA API
|
|
const nvidiaApiKey = process.env.NVIDIA_API_KEY;
|
|
if (!nvidiaApiKey) {
|
|
throw new Error('NVIDIA_API_KEY is required for NVIDIA provider. Please set the NVIDIA_API_KEY environment variable.');
|
|
}
|
|
|
|
const nvidiaUrl = 'https://integrate.api.nvidia.com/v1';
|
|
|
|
// Note: NVIDIA API doesn't support streaming in axios, so we'll use non-streaming
|
|
// and format the thinking content into <think> tags manually
|
|
response = await axios.post(`${nvidiaUrl}/chat/completions`, {
|
|
model: finalModel,
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content: '/think' // Special NVIDIA API command to activate thinking mode
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: prompt
|
|
}
|
|
],
|
|
temperature: 0.2,
|
|
max_tokens: 4096,
|
|
top_p: 0.95,
|
|
frequency_penalty: 0,
|
|
presence_penalty: 0,
|
|
stream: false, // We need non-streaming to get thinking tokens
|
|
// NVIDIA-specific thinking token parameters
|
|
min_thinking_tokens: 1024,
|
|
max_thinking_tokens: 2048
|
|
}, {
|
|
headers: {
|
|
'Authorization': `Bearer ${nvidiaApiKey}`,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
timeout: 120000 // 120 second timeout
|
|
});
|
|
} else {
|
|
// Use Ollama (default)
|
|
const ollamaUrl = process.env.OLLAMA_BASE_URL || 'http://localhost:11434/v1';
|
|
|
|
response = await axios.post(`${ollamaUrl}/chat/completions`, {
|
|
model: finalModel,
|
|
messages: [
|
|
{
|
|
role: 'system',
|
|
content: 'You are a knowledgeable research assistant specializing in biomedical and scientific literature. Provide accurate, well-structured answers based on the provided context. Maintain a professional yet accessible tone, and clearly indicate when information is limited or uncertain.'
|
|
},
|
|
{
|
|
role: 'user',
|
|
content: prompt
|
|
}
|
|
],
|
|
temperature: 0.2, // Lower for more factual, consistent responses
|
|
max_tokens: 800 // Increased for more comprehensive answers
|
|
});
|
|
}
|
|
|
|
// Extract answer and reasoning (if using NVIDIA with thinking tokens)
|
|
const messageData = response.data.choices[0].message;
|
|
let answer = messageData.content || '';
|
|
|
|
// Check if NVIDIA API returned reasoning_content (thinking tokens)
|
|
if (finalProvider === 'nvidia' && messageData.reasoning_content) {
|
|
// Format with <think> tags for UI parsing
|
|
answer = `<think>\n${messageData.reasoning_content}\n</think>\n\n${answer}`;
|
|
console.log('Formatted response with thinking content');
|
|
}
|
|
|
|
// DEBUG: Log triples before returning to verify they still have depth/pathLength
|
|
console.log('Returning triples (first one):', JSON.stringify(topTriples[0], null, 2));
|
|
|
|
return {
|
|
answer,
|
|
triples: topTriples,
|
|
count: topTriples.length
|
|
};
|
|
} catch (error) {
|
|
console.error('Error calling LLM for answer generation:', error);
|
|
// Fallback: return triples without LLM enhancement
|
|
return {
|
|
answer: `Found ${topTriples.length} relevant triples:\n\n${context}`,
|
|
triples: topTriples,
|
|
count: topTriples.length
|
|
};
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Store document chunks for Pure RAG (separate from entity embeddings)
|
|
* This stores full text chunks rather than just entity names
|
|
* @param documents Array of document text chunks
|
|
* @param metadata Optional metadata for each document
|
|
*/
|
|
public async storeDocumentChunks(
|
|
documents: string[],
|
|
metadata?: Record<string, any>[]
|
|
): Promise<void> {
|
|
console.log(`Storing ${documents.length} document chunks for Pure RAG`);
|
|
|
|
// Generate embeddings for document chunks
|
|
const embeddings = await this.generateEmbeddings(documents);
|
|
|
|
// Store in Qdrant document-embeddings collection
|
|
await this.pineconeService.storeDocumentChunks(documents, embeddings, metadata);
|
|
|
|
console.log(`✅ Stored ${documents.length} document chunks in document-embeddings collection`);
|
|
}
|
|
|
|
/**
|
|
* Close connections to backend services
|
|
*/
|
|
public async close(): Promise<void> {
|
|
if (this.graphDBService.isInitialized()) {
|
|
this.graphDBService.close();
|
|
}
|
|
console.log('Backend service closed');
|
|
}
|
|
}
|
|
|
|
export default BackendService.getInstance();
|