mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 10:33:51 +00:00
744 lines
24 KiB
TypeScript
744 lines
24 KiB
TypeScript
//
|
|
// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
import { Database, aql } from 'arangojs';
|
|
import { createHash } from 'crypto';
|
|
|
|
/**
|
|
* ArangoDB service for database operations
|
|
* Provides methods to connect to and interact with an ArangoDB database
|
|
*/
|
|
export class ArangoDBService {
|
|
private db: Database | null = null;
|
|
private static instance: ArangoDBService;
|
|
private collectionName: string = 'entities';
|
|
private edgeCollectionName: string = 'relationships';
|
|
private documentsCollectionName: string = 'processed_documents';
|
|
|
|
private constructor() {}
|
|
|
|
/**
|
|
* Generate a deterministic _key from input string using MD5 hash
|
|
* Uses Node.js built-in crypto module - truncated to 16 chars for compact keys
|
|
* @param input - String to hash
|
|
* @returns Hex-encoded hash string (16 chars, safe for ArangoDB _key)
|
|
*/
|
|
private generateKey(input: string): string {
|
|
return createHash('md5').update(input).digest('hex').slice(0, 16);
|
|
}
|
|
|
|
/**
|
|
* Generate a deterministic _key for an entity based on its name
|
|
* @param name - Entity name
|
|
* @returns Deterministic _key string
|
|
*/
|
|
private generateEntityKey(name: string): string {
|
|
return this.generateKey(name.toLowerCase().trim());
|
|
}
|
|
|
|
/**
|
|
* Generate a deterministic _key for an edge based on its endpoints and type
|
|
* @param fromKey - Source entity _key
|
|
* @param toKey - Target entity _key
|
|
* @param relationType - Relationship type/predicate
|
|
* @returns Deterministic _key string
|
|
*/
|
|
private generateEdgeKey(fromKey: string, toKey: string, relationType: string): string {
|
|
return this.generateKey(`${fromKey}|${relationType.toLowerCase().trim()}|${toKey}`);
|
|
}
|
|
|
|
/**
|
|
* Get the singleton instance of ArangoDBService
|
|
*/
|
|
public static getInstance(): ArangoDBService {
|
|
if (!ArangoDBService.instance) {
|
|
ArangoDBService.instance = new ArangoDBService();
|
|
}
|
|
return ArangoDBService.instance;
|
|
}
|
|
|
|
/**
|
|
* Initialize the ArangoDB connection
|
|
* @param url - ArangoDB connection URL (defaults to ARANGODB_URL env var or 'http://localhost:8529')
|
|
* @param databaseName - ArangoDB database name (defaults to ARANGODB_DB env var or 'txt2kg')
|
|
* @param username - ArangoDB username (defaults to ARANGODB_USER env var or 'root')
|
|
* @param password - ArangoDB password (defaults to ARANGODB_PASSWORD env var or '')
|
|
*/
|
|
public async initialize(url?: string, databaseName?: string, username?: string, password?: string): Promise<void> {
|
|
// Use provided values, or environment variables, or defaults
|
|
const connectionUrl = url || process.env.ARANGODB_URL || 'http://localhost:8529';
|
|
const dbName = databaseName || process.env.ARANGODB_DB || 'txt2kg';
|
|
const user = username || process.env.ARANGODB_USER || 'root';
|
|
const pass = password || process.env.ARANGODB_PASSWORD || '';
|
|
|
|
try {
|
|
// Initialize the database connection
|
|
this.db = new Database({
|
|
url: connectionUrl,
|
|
databaseName: dbName,
|
|
auth: { username: user, password: pass },
|
|
});
|
|
|
|
// Check if database exists, create if it doesn't
|
|
const dbExists = await this.db.exists();
|
|
if (!dbExists) {
|
|
console.log(`Database ${dbName} does not exist, creating it...`);
|
|
await this.db.createDatabase(dbName);
|
|
this.db.useDatabase(dbName);
|
|
}
|
|
|
|
// Check if collections exist, create if they don't
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
// Create entity collection if it doesn't exist
|
|
if (!collectionNames.includes(this.collectionName)) {
|
|
await this.db.createCollection(this.collectionName);
|
|
await this.db.collection(this.collectionName).ensureIndex({
|
|
name: 'inverted_index',
|
|
type: 'inverted',
|
|
fields: ['name'],
|
|
analyzer: 'text_en'
|
|
});
|
|
await this.db.createView(`${this.collectionName}_view`, {
|
|
type: 'search-alias',
|
|
indexes: [
|
|
{
|
|
collection: this.collectionName,
|
|
index: 'inverted_index'
|
|
}
|
|
]
|
|
});
|
|
}
|
|
|
|
// Create edge collection if it doesn't exist
|
|
if (!collectionNames.includes(this.edgeCollectionName)) {
|
|
await this.db.createEdgeCollection(this.edgeCollectionName);
|
|
await this.db.collection(this.edgeCollectionName).ensureIndex({
|
|
name: 'inverted_index',
|
|
type: 'inverted',
|
|
fields: ['type'],
|
|
analyzer: 'text_en'
|
|
});
|
|
await this.db.createView(`${this.edgeCollectionName}_view`, {
|
|
type: 'search-alias',
|
|
indexes: [
|
|
{
|
|
collection: this.edgeCollectionName,
|
|
index: 'inverted_index'
|
|
}
|
|
]
|
|
});
|
|
}
|
|
|
|
// Create documents collection if it doesn't exist
|
|
if (!collectionNames.includes(this.documentsCollectionName)) {
|
|
await this.db.createCollection(this.documentsCollectionName);
|
|
}
|
|
|
|
console.log('ArangoDB initialized successfully');
|
|
} catch (error) {
|
|
console.error('Failed to initialize ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if the database connection is initialized
|
|
*/
|
|
public isInitialized(): boolean {
|
|
return this.db !== null;
|
|
}
|
|
|
|
/**
|
|
* Close the ArangoDB connection
|
|
*/
|
|
public close(): void {
|
|
if (this.db) {
|
|
this.db = null;
|
|
console.log('ArangoDB connection closed');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Execute an AQL query
|
|
* @param query - AQL query string
|
|
* @param bindVars - Parameters for the query
|
|
* @returns Promise resolving to query results
|
|
*/
|
|
public async executeQuery(query: string, bindVars: Record<string, any> = {}): Promise<any[]> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const cursor = await this.db.query(query, bindVars);
|
|
return await cursor.all();
|
|
} catch (error) {
|
|
console.error('Error executing ArangoDB query:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create a node in the graph database
|
|
* @param properties - Node properties
|
|
* @returns Promise resolving to the created node
|
|
*/
|
|
public async createNode(properties: Record<string, any>): Promise<any> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const collection = this.db.collection(this.collectionName);
|
|
const doc = { ...properties, _key: this.generateEntityKey(properties.name) }
|
|
return await collection.save(doc, { overwriteMode: 'update' });
|
|
} catch (error) {
|
|
console.error('Error creating node in ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create a relationship between two nodes
|
|
* @param fromKey - Key of the start node
|
|
* @param toKey - Key of the end node
|
|
* @param relationType - Type of relationship
|
|
* @param properties - Relationship properties
|
|
* @returns Promise resolving to the created relationship
|
|
*/
|
|
public async createRelationship(
|
|
fromKey: string,
|
|
toKey: string,
|
|
relationType: string,
|
|
properties: Record<string, any> = {}
|
|
): Promise<any> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const edgeCollection = this.db.collection(this.edgeCollectionName);
|
|
const edgeData = {
|
|
_key: this.generateEdgeKey(fromKey, toKey, relationType),
|
|
_from: `${this.collectionName}/${fromKey}`,
|
|
_to: `${this.collectionName}/${toKey}`,
|
|
type: relationType,
|
|
...properties
|
|
};
|
|
return await edgeCollection.save(edgeData, { overwriteMode: 'update' });
|
|
} catch (error) {
|
|
console.error('Error creating relationship in ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Import triples (subject, predicate, object) into the graph database
|
|
* Batches inserts every 1000 documents by default
|
|
* @param triples - Array of triples to import
|
|
* @param batchSize - Number of documents to insert per batch (default: 1000)
|
|
* @returns Promise resolving when import is complete
|
|
*/
|
|
public async importTriples(
|
|
triples: { subject: string; predicate: string; object: string }[],
|
|
batchSize: number = 1000
|
|
): Promise<void> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
let entityBatch: Array<{ _key: string; name: string }> = [];
|
|
let edgeBatch: Array<{ _key: string; _from: string; _to: string; type: string }> = [];
|
|
|
|
const importEntities = async () => {
|
|
if (entityBatch.length === 0) return;
|
|
await this.db!.collection(this.collectionName).saveAll(entityBatch, { overwriteMode: 'ignore' });
|
|
console.log(`[ArangoDB] Imported ${entityBatch.length} entities`);
|
|
entityBatch = [];
|
|
};
|
|
|
|
const importEdges = async () => {
|
|
if (edgeBatch.length === 0) return;
|
|
await this.db!.collection(this.edgeCollectionName).saveAll(edgeBatch, { overwriteMode: 'ignore' });
|
|
console.log(`[ArangoDB] Imported ${edgeBatch.length} edges`);
|
|
edgeBatch = [];
|
|
};
|
|
|
|
try {
|
|
for (const triple of triples) {
|
|
const normalizedSubject = triple.subject.trim();
|
|
const normalizedPredicate = triple.predicate.trim();
|
|
const normalizedObject = triple.object.trim();
|
|
|
|
if (!normalizedSubject || !normalizedPredicate || !normalizedObject) {
|
|
console.warn('Skipping invalid triple:', triple);
|
|
continue;
|
|
}
|
|
|
|
const subjectKey = this.generateEntityKey(normalizedSubject);
|
|
const objectKey = this.generateEntityKey(normalizedObject);
|
|
const edgeKey = this.generateEdgeKey(subjectKey, objectKey, normalizedPredicate);
|
|
|
|
entityBatch.push({ _key: subjectKey, name: normalizedSubject });
|
|
entityBatch.push({ _key: objectKey, name: normalizedObject });
|
|
|
|
edgeBatch.push({
|
|
_key: edgeKey,
|
|
_from: `${this.collectionName}/${subjectKey}`,
|
|
_to: `${this.collectionName}/${objectKey}`,
|
|
type: normalizedPredicate
|
|
});
|
|
|
|
if (entityBatch.length >= batchSize) await importEntities();
|
|
if (edgeBatch.length >= batchSize) await importEdges();
|
|
}
|
|
|
|
// Flush remaining
|
|
await importEntities();
|
|
await importEdges();
|
|
|
|
console.log(`Successfully imported ${triples.length} triples into ArangoDB`);
|
|
} catch (error) {
|
|
console.error('Error importing triples into ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a document has already been processed and stored in ArangoDB
|
|
* @param documentName - Name of the document to check
|
|
* @returns Promise resolving to true if document exists, false otherwise
|
|
*/
|
|
public async isDocumentProcessed(documentName: string): Promise<boolean> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
const collection = this.db.collection(this.documentsCollectionName);
|
|
const key = this.generateKey(documentName.trim());
|
|
return await collection.documentExists(key);
|
|
}
|
|
|
|
/**
|
|
* Mark a document as processed in ArangoDB
|
|
* @param documentName - Name of the document
|
|
* @param tripleCount - Number of triples stored for this document
|
|
* @returns Promise resolving when the document is marked as processed
|
|
*/
|
|
public async markDocumentAsProcessed(documentName: string, tripleCount: number): Promise<void> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const collection = this.db.collection(this.documentsCollectionName);
|
|
const doc = {
|
|
_key: this.generateKey(documentName.trim()),
|
|
documentName,
|
|
tripleCount,
|
|
processedAt: new Date().toISOString()
|
|
};
|
|
|
|
await collection.save(doc, { overwriteMode: 'replace' });
|
|
console.log(`Marked document "${documentName}" as processed with ${tripleCount} triples`);
|
|
} catch (error) {
|
|
console.error('Error marking document as processed:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get all processed documents from ArangoDB
|
|
* @returns Promise resolving to array of processed document names
|
|
*/
|
|
public async getProcessedDocuments(): Promise<string[]> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const documents = await this.executeQuery(
|
|
`FOR d IN ${this.documentsCollectionName} RETURN d.documentName`
|
|
);
|
|
return documents;
|
|
} catch (error) {
|
|
console.error('Error getting processed documents:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get graph data in a format compatible with the existing application
|
|
* @returns Promise resolving to nodes and relationships
|
|
*/
|
|
public async getGraphData(): Promise<{
|
|
nodes: Array<{
|
|
id: string;
|
|
labels: string[];
|
|
[key: string]: any
|
|
}>;
|
|
relationships: Array<{
|
|
id: string;
|
|
source: string;
|
|
target: string;
|
|
type: string;
|
|
[key: string]: any
|
|
}>;
|
|
}> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Get all entities (nodes)
|
|
const entities = await this.executeQuery(
|
|
`FOR e IN ${this.collectionName} RETURN e`
|
|
);
|
|
|
|
// Get all relationships (edges)
|
|
const relationships = await this.executeQuery(
|
|
`FOR r IN ${this.edgeCollectionName} RETURN r`
|
|
);
|
|
|
|
// Format nodes in a way compatible with the application
|
|
const nodes = entities.map(entity => ({
|
|
id: entity._key,
|
|
labels: ['Entity'],
|
|
name: entity.name,
|
|
...entity
|
|
}));
|
|
|
|
// Format relationships in a way compatible with the application
|
|
const formattedRelationships = relationships.map(rel => {
|
|
const source = rel._from.split('/')[1];
|
|
const target = rel._to.split('/')[1];
|
|
|
|
return {
|
|
id: rel._key,
|
|
source,
|
|
target,
|
|
type: rel.type,
|
|
...rel
|
|
};
|
|
});
|
|
|
|
return {
|
|
nodes,
|
|
relationships: formattedRelationships
|
|
};
|
|
} catch (error) {
|
|
console.error('Error getting graph data from ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Log query information and metrics
|
|
*/
|
|
public async logQuery(
|
|
query: string,
|
|
queryMode: 'traditional' | 'vector-search' | 'pure-rag',
|
|
metrics: {
|
|
executionTimeMs: number;
|
|
relevanceScore?: number;
|
|
precision?: number;
|
|
recall?: number;
|
|
resultCount: number;
|
|
}
|
|
): Promise<void> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Create a queryLogs collection if it doesn't exist
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
if (!collectionNames.includes('queryLogs')) {
|
|
await this.db.createCollection('queryLogs');
|
|
}
|
|
|
|
// Store query log
|
|
const queryLog = {
|
|
query,
|
|
queryMode,
|
|
metrics,
|
|
timestamp: new Date().toISOString()
|
|
};
|
|
|
|
await this.db.collection('queryLogs').save(queryLog);
|
|
} catch (error) {
|
|
console.error('Error logging query to ArangoDB:', error);
|
|
// We don't want to throw here as query logging is non-critical
|
|
console.error('Query logging failed but continuing execution');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get query logs
|
|
* @param limit - Maximum number of logs to retrieve
|
|
* @returns Promise resolving to query logs
|
|
*/
|
|
public async getQueryLogs(limit: number = 100): Promise<any[]> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Check if queryLogs collection exists
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
if (!collectionNames.includes('queryLogs')) {
|
|
return [];
|
|
}
|
|
|
|
// Get logs sorted by timestamp
|
|
const logs = await this.executeQuery(
|
|
`FOR l IN queryLogs SORT l.timestamp DESC LIMIT @limit RETURN l`,
|
|
{ limit }
|
|
);
|
|
|
|
return logs;
|
|
} catch (error) {
|
|
console.error('Error getting query logs from ArangoDB:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Perform graph traversal to find relevant triples using ArangoDB's native text search and graph capabilities
|
|
* Uses inverted indexes with BM25 scoring for efficient keyword matching
|
|
* @param keywords - Array of keywords to search for
|
|
* @param maxDepth - Maximum traversal depth (default: 2)
|
|
* @param maxResults - Maximum number of results to return (default: 100)
|
|
* @param maxSeeds - Maximum number of seed nodes/edges from text search (default: 50)
|
|
* @returns Promise resolving to array of triples with relevance scores
|
|
*/
|
|
public async graphTraversal(
|
|
keywords: string[],
|
|
maxDepth: number = 2,
|
|
maxResults: number = 100,
|
|
maxSeeds: number = 50
|
|
): Promise<Array<{
|
|
subject: string;
|
|
predicate: string;
|
|
object: string;
|
|
confidence: number;
|
|
depth?: number;
|
|
}>> {
|
|
console.log(`[ArangoDB] graphTraversal called with keywords: ${keywords.join(', ')}`);
|
|
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Build case-insensitive keyword matching conditions
|
|
const keywordConditions = keywords
|
|
.filter(kw => kw.length > 2) // Filter short words
|
|
.map(kw => kw.toLowerCase());
|
|
|
|
if (keywordConditions.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const query = `
|
|
// 1. Tokenize keywords using the same analyzer as the index
|
|
LET keywords_merged = CONCAT_SEPARATOR(" ", @keywords)
|
|
LET keywords_tokens = TOKENS(keywords_merged, "text_en")
|
|
|
|
// 2. Match for entity.name
|
|
LET seedNodes = (
|
|
FOR vertex IN ${this.collectionName}_view
|
|
SEARCH ANALYZER(vertex.name IN keywords_tokens, "text_en")
|
|
LET score = BM25(vertex)
|
|
SORT score DESC
|
|
LIMIT @maxSeeds
|
|
RETURN { vertex, score }
|
|
)
|
|
|
|
// 3. Match for relationship.type
|
|
LET seedEdges = (
|
|
FOR edge IN ${this.edgeCollectionName}_view
|
|
SEARCH ANALYZER(edge.type IN keywords_tokens, "text_en")
|
|
LET score = BM25(edge)
|
|
SORT score DESC
|
|
LIMIT @maxSeeds
|
|
RETURN { edge, score }
|
|
)
|
|
|
|
// 4. Normalize scores
|
|
LET maxNodeScore = MAX(seedNodes[*].score) || 1
|
|
LET maxEdgeScore = MAX(seedEdges[*].score) || 1
|
|
|
|
// 5. Traverse from seedNodes up to maxDepth
|
|
LET traversalResults = (
|
|
FOR seed IN seedNodes
|
|
FOR v, e, p IN 1..@maxDepth ANY seed.vertex ${this.edgeCollectionName}
|
|
OPTIONS { uniqueVertices: 'path', bfs: true }
|
|
|
|
LET subjectEntity = DOCUMENT(e._from)
|
|
LET objectEntity = DOCUMENT(e._to)
|
|
LET depth = LENGTH(p.edges) - 1
|
|
|
|
// Depth penalty: closer to seed = higher score
|
|
LET depthPenalty = 1.0 / (1.0 + depth * 0.2)
|
|
|
|
// Normalize seed score and apply depth penalty
|
|
LET normalizedSeedScore = seed.score / maxNodeScore
|
|
LET confidence = normalizedSeedScore * depthPenalty
|
|
|
|
RETURN {
|
|
subject: subjectEntity.name,
|
|
predicate: e.type,
|
|
object: objectEntity.name,
|
|
confidence: confidence,
|
|
depth: depth,
|
|
_edgeId: e._id,
|
|
pathLength: LENGTH(p.edges)
|
|
}
|
|
)
|
|
|
|
// 6. Collect triples from seedEdges (direct hits)
|
|
LET edgeResults = (
|
|
FOR seed IN seedEdges
|
|
LET subjectEntity = DOCUMENT(seed.edge._from)
|
|
LET objectEntity = DOCUMENT(seed.edge._to)
|
|
|
|
// Direct edge matches get a boost (depth 0)
|
|
LET normalizedScore = seed.score / maxEdgeScore
|
|
|
|
RETURN {
|
|
subject: subjectEntity.name,
|
|
predicate: seed.edge.type,
|
|
object: objectEntity.name,
|
|
confidence: normalizedScore * 1.2, // Boost direct edge matches
|
|
depth: 0,
|
|
_edgeId: seed.edge._id,
|
|
pathLength: 1
|
|
}
|
|
)
|
|
|
|
// 7. Combine traversalResults and edgeResults
|
|
LET combinedResults = APPEND(traversalResults, edgeResults)
|
|
|
|
// 8. Remove duplicates by edge ID and sort by confidence
|
|
LET uniqueResults = (
|
|
FOR result IN combinedResults
|
|
COLLECT edgeId = result._edgeId INTO groups
|
|
LET best = FIRST(
|
|
FOR g IN groups
|
|
SORT g.result.confidence DESC
|
|
RETURN g.result
|
|
)
|
|
RETURN best
|
|
)
|
|
|
|
// 9. Sort by confidence and limit results
|
|
FOR result IN uniqueResults
|
|
FILTER result != null
|
|
SORT result.confidence DESC, result.depth ASC
|
|
LIMIT @maxResults
|
|
RETURN {
|
|
subject: result.subject,
|
|
predicate: result.predicate,
|
|
object: result.object,
|
|
confidence: result.confidence,
|
|
depth: result.depth,
|
|
pathLength: result.pathLength
|
|
}
|
|
`;
|
|
|
|
console.log(`[ArangoDB] Executing query with ${keywordConditions.length} keywords`);
|
|
|
|
const results = await this.executeQuery(query, {
|
|
keywords: keywordConditions,
|
|
maxDepth,
|
|
maxResults,
|
|
maxSeeds
|
|
});
|
|
|
|
console.log(`[ArangoDB] Found ${results.length} triples for keywords: ${keywords.join(', ')}`);
|
|
|
|
// Log top 10 results with confidence scores
|
|
if (results.length > 0) {
|
|
console.log('[ArangoDB] Top 10 triples by confidence:');
|
|
results.slice(0, 10).forEach((triple: any, idx: number) => {
|
|
const pathInfo = triple.pathLength ? ` path=${triple.pathLength}` : '';
|
|
console.log(` ${idx + 1}. [conf=${triple.confidence?.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth=${triple.depth}${pathInfo})`);
|
|
});
|
|
} else {
|
|
console.log('[ArangoDB] No triples found!');
|
|
}
|
|
|
|
return results;
|
|
} catch (error) {
|
|
console.error('Error performing graph traversal in ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get basic info about the ArangoDB connection
|
|
*/
|
|
public getDriverInfo(): Record<string, any> {
|
|
if (!this.db) {
|
|
return { status: 'not connected' };
|
|
}
|
|
|
|
return {
|
|
status: 'connected',
|
|
url: this.db.url,
|
|
database: this.db.name
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Clear all data from the graph database
|
|
* @returns Promise resolving when all collections are cleared
|
|
*/
|
|
public async clearDatabase(): Promise<void> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Truncate the entities collection (nodes)
|
|
await this.db.collection(this.collectionName).truncate();
|
|
|
|
// Truncate the relationships collection (edges)
|
|
await this.db.collection(this.edgeCollectionName).truncate();
|
|
|
|
// Also clear query logs if they exist
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
if (collectionNames.includes('queryLogs')) {
|
|
await this.db.collection('queryLogs').truncate();
|
|
}
|
|
|
|
console.log('ArangoDB database cleared successfully');
|
|
} catch (error) {
|
|
console.error('Error clearing ArangoDB database:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
}
|