mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 02:23:53 +00:00
445 lines
13 KiB
TypeScript
445 lines
13 KiB
TypeScript
import { Database, aql } from 'arangojs';
|
|
|
|
/**
|
|
* ArangoDB service for database operations
|
|
* Provides methods to connect to and interact with an ArangoDB database
|
|
*/
|
|
export class ArangoDBService {
|
|
private db: Database | null = null;
|
|
private static instance: ArangoDBService;
|
|
private collectionName: string = 'entities';
|
|
private edgeCollectionName: string = 'relationships';
|
|
|
|
private constructor() {}
|
|
|
|
/**
|
|
* Get the singleton instance of ArangoDBService
|
|
*/
|
|
public static getInstance(): ArangoDBService {
|
|
if (!ArangoDBService.instance) {
|
|
ArangoDBService.instance = new ArangoDBService();
|
|
}
|
|
return ArangoDBService.instance;
|
|
}
|
|
|
|
/**
|
|
* Initialize the ArangoDB connection
|
|
* @param url - ArangoDB connection URL (defaults to ARANGODB_URL env var or 'http://localhost:8529')
|
|
* @param databaseName - ArangoDB database name (defaults to ARANGODB_DB env var or 'txt2kg')
|
|
* @param username - ArangoDB username (defaults to ARANGODB_USER env var or 'root')
|
|
* @param password - ArangoDB password (defaults to ARANGODB_PASSWORD env var or '')
|
|
*/
|
|
public async initialize(url?: string, databaseName?: string, username?: string, password?: string): Promise<void> {
|
|
// Use provided values, or environment variables, or defaults
|
|
const connectionUrl = url || process.env.ARANGODB_URL || 'http://localhost:8529';
|
|
const dbName = databaseName || process.env.ARANGODB_DB || 'txt2kg';
|
|
const user = username || process.env.ARANGODB_USER || 'root';
|
|
const pass = password || process.env.ARANGODB_PASSWORD || '';
|
|
|
|
try {
|
|
// Initialize the database connection
|
|
this.db = new Database({
|
|
url: connectionUrl,
|
|
databaseName: dbName,
|
|
auth: { username: user, password: pass },
|
|
});
|
|
|
|
// Check if database exists, create if it doesn't
|
|
const dbExists = await this.db.exists();
|
|
if (!dbExists) {
|
|
console.log(`Database ${dbName} does not exist, creating it...`);
|
|
await this.db.createDatabase(dbName);
|
|
this.db.useDatabase(dbName);
|
|
}
|
|
|
|
// Check if collections exist, create if they don't
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
// Create entity collection if it doesn't exist
|
|
if (!collectionNames.includes(this.collectionName)) {
|
|
await this.db.createCollection(this.collectionName);
|
|
await this.db.collection(this.collectionName).ensureIndex({
|
|
type: 'persistent',
|
|
fields: ['name'],
|
|
unique: true
|
|
});
|
|
}
|
|
|
|
// Create edge collection if it doesn't exist
|
|
if (!collectionNames.includes(this.edgeCollectionName)) {
|
|
await this.db.createEdgeCollection(this.edgeCollectionName);
|
|
await this.db.collection(this.edgeCollectionName).ensureIndex({
|
|
type: 'persistent',
|
|
fields: ['type']
|
|
});
|
|
}
|
|
|
|
console.log('ArangoDB initialized successfully');
|
|
} catch (error) {
|
|
console.error('Failed to initialize ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if the database connection is initialized
|
|
*/
|
|
public isInitialized(): boolean {
|
|
return this.db !== null;
|
|
}
|
|
|
|
/**
|
|
* Close the ArangoDB connection
|
|
*/
|
|
public close(): void {
|
|
if (this.db) {
|
|
this.db = null;
|
|
console.log('ArangoDB connection closed');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Execute an AQL query
|
|
* @param query - AQL query string
|
|
* @param bindVars - Parameters for the query
|
|
* @returns Promise resolving to query results
|
|
*/
|
|
public async executeQuery(query: string, bindVars: Record<string, any> = {}): Promise<any[]> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const cursor = await this.db.query(query, bindVars);
|
|
return await cursor.all();
|
|
} catch (error) {
|
|
console.error('Error executing ArangoDB query:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create a node in the graph database
|
|
* @param properties - Node properties
|
|
* @returns Promise resolving to the created node
|
|
*/
|
|
public async createNode(properties: Record<string, any>): Promise<any> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const collection = this.db.collection(this.collectionName);
|
|
return await collection.save(properties);
|
|
} catch (error) {
|
|
console.error('Error creating node in ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create a relationship between two nodes
|
|
* @param fromKey - Key of the start node
|
|
* @param toKey - Key of the end node
|
|
* @param relationType - Type of relationship
|
|
* @param properties - Relationship properties
|
|
* @returns Promise resolving to the created relationship
|
|
*/
|
|
public async createRelationship(
|
|
fromKey: string,
|
|
toKey: string,
|
|
relationType: string,
|
|
properties: Record<string, any> = {}
|
|
): Promise<any> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
const edgeCollection = this.db.collection(this.edgeCollectionName);
|
|
const edgeData = {
|
|
_from: `${this.collectionName}/${fromKey}`,
|
|
_to: `${this.collectionName}/${toKey}`,
|
|
type: relationType,
|
|
...properties
|
|
};
|
|
return await edgeCollection.save(edgeData);
|
|
} catch (error) {
|
|
console.error('Error creating relationship in ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Import triples (subject, predicate, object) into the graph database
|
|
* @param triples - Array of triples to import
|
|
* @returns Promise resolving when import is complete
|
|
*/
|
|
public async importTriples(triples: { subject: string; predicate: string; object: string }[]): Promise<void> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Process triples in batches to improve performance
|
|
for (const triple of triples) {
|
|
// Normalize triple values
|
|
const normalizedSubject = triple.subject.trim();
|
|
const normalizedPredicate = triple.predicate.trim();
|
|
const normalizedObject = triple.object.trim();
|
|
|
|
// Skip invalid triples
|
|
if (!normalizedSubject || !normalizedPredicate || !normalizedObject) {
|
|
console.warn('Skipping invalid triple:', triple);
|
|
continue;
|
|
}
|
|
|
|
// Upsert subject and object nodes
|
|
const subjectNode = await this.upsertEntity(normalizedSubject);
|
|
const objectNode = await this.upsertEntity(normalizedObject);
|
|
|
|
// Check if relationship already exists
|
|
const existingEdges = await this.executeQuery(
|
|
`FOR e IN ${this.edgeCollectionName}
|
|
FILTER e._from == @from AND e._to == @to AND e.type == @type
|
|
RETURN e`,
|
|
{
|
|
from: `${this.collectionName}/${subjectNode._key}`,
|
|
to: `${this.collectionName}/${objectNode._key}`,
|
|
type: normalizedPredicate
|
|
}
|
|
);
|
|
|
|
// Create relationship if it doesn't exist
|
|
if (existingEdges.length === 0) {
|
|
await this.createRelationship(
|
|
subjectNode._key,
|
|
objectNode._key,
|
|
normalizedPredicate
|
|
);
|
|
}
|
|
}
|
|
|
|
console.log(`Successfully imported ${triples.length} triples into ArangoDB`);
|
|
} catch (error) {
|
|
console.error('Error importing triples into ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Helper method to upsert (create or update) an entity
|
|
* @param name - Entity name
|
|
* @returns Promise resolving to the entity
|
|
*/
|
|
private async upsertEntity(name: string): Promise<any> {
|
|
const collection = this.db!.collection(this.collectionName);
|
|
|
|
// Look for existing entity
|
|
const existing = await this.executeQuery(
|
|
`FOR e IN ${this.collectionName} FILTER e.name == @name RETURN e`,
|
|
{ name }
|
|
);
|
|
|
|
if (existing.length > 0) {
|
|
return existing[0];
|
|
}
|
|
|
|
// Create new entity
|
|
return await collection.save({ name });
|
|
}
|
|
|
|
/**
|
|
* Get graph data in a format compatible with the existing application
|
|
* @returns Promise resolving to nodes and relationships
|
|
*/
|
|
public async getGraphData(): Promise<{
|
|
nodes: Array<{
|
|
id: string;
|
|
labels: string[];
|
|
[key: string]: any
|
|
}>;
|
|
relationships: Array<{
|
|
id: string;
|
|
source: string;
|
|
target: string;
|
|
type: string;
|
|
[key: string]: any
|
|
}>;
|
|
}> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Get all entities (nodes)
|
|
const entities = await this.executeQuery(
|
|
`FOR e IN ${this.collectionName} RETURN e`
|
|
);
|
|
|
|
// Get all relationships (edges)
|
|
const relationships = await this.executeQuery(
|
|
`FOR r IN ${this.edgeCollectionName} RETURN r`
|
|
);
|
|
|
|
// Build id to key mapping for relationships
|
|
const idToKey = new Map<string, string>();
|
|
for (const entity of entities) {
|
|
idToKey.set(entity._id, entity._key);
|
|
}
|
|
|
|
// Format nodes in a way compatible with the application
|
|
const nodes = entities.map(entity => ({
|
|
id: entity._key,
|
|
labels: ['Entity'],
|
|
name: entity.name,
|
|
...entity
|
|
}));
|
|
|
|
// Format relationships in a way compatible with the application
|
|
const formattedRelationships = relationships.map(rel => {
|
|
// Extract the entity keys from _from and _to
|
|
const source = rel._from.split('/')[1];
|
|
const target = rel._to.split('/')[1];
|
|
|
|
return {
|
|
id: rel._key,
|
|
source,
|
|
target,
|
|
type: rel.type,
|
|
...rel
|
|
};
|
|
});
|
|
|
|
return {
|
|
nodes,
|
|
relationships: formattedRelationships
|
|
};
|
|
} catch (error) {
|
|
console.error('Error getting graph data from ArangoDB:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Log query information and metrics
|
|
*/
|
|
public async logQuery(
|
|
query: string,
|
|
queryMode: 'traditional' | 'vector-search' | 'pure-rag',
|
|
metrics: {
|
|
executionTimeMs: number;
|
|
relevanceScore?: number;
|
|
precision?: number;
|
|
recall?: number;
|
|
resultCount: number;
|
|
}
|
|
): Promise<void> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Create a queryLogs collection if it doesn't exist
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
if (!collectionNames.includes('queryLogs')) {
|
|
await this.db.createCollection('queryLogs');
|
|
}
|
|
|
|
// Store query log
|
|
const queryLog = {
|
|
query,
|
|
queryMode,
|
|
metrics,
|
|
timestamp: new Date().toISOString()
|
|
};
|
|
|
|
await this.db.collection('queryLogs').save(queryLog);
|
|
} catch (error) {
|
|
console.error('Error logging query to ArangoDB:', error);
|
|
// We don't want to throw here as query logging is non-critical
|
|
console.error('Query logging failed but continuing execution');
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get query logs
|
|
* @param limit - Maximum number of logs to retrieve
|
|
* @returns Promise resolving to query logs
|
|
*/
|
|
public async getQueryLogs(limit: number = 100): Promise<any[]> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Check if queryLogs collection exists
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
if (!collectionNames.includes('queryLogs')) {
|
|
return [];
|
|
}
|
|
|
|
// Get logs sorted by timestamp
|
|
const logs = await this.executeQuery(
|
|
`FOR l IN queryLogs SORT l.timestamp DESC LIMIT @limit RETURN l`,
|
|
{ limit }
|
|
);
|
|
|
|
return logs;
|
|
} catch (error) {
|
|
console.error('Error getting query logs from ArangoDB:', error);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get basic info about the ArangoDB connection
|
|
*/
|
|
public getDriverInfo(): Record<string, any> {
|
|
if (!this.db) {
|
|
return { status: 'not connected' };
|
|
}
|
|
|
|
return {
|
|
status: 'connected',
|
|
url: this.db.url,
|
|
database: this.db.name
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Clear all data from the graph database
|
|
* @returns Promise resolving when all collections are cleared
|
|
*/
|
|
public async clearDatabase(): Promise<void> {
|
|
if (!this.db) {
|
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
|
}
|
|
|
|
try {
|
|
// Truncate the entities collection (nodes)
|
|
await this.db.collection(this.collectionName).truncate();
|
|
|
|
// Truncate the relationships collection (edges)
|
|
await this.db.collection(this.edgeCollectionName).truncate();
|
|
|
|
// Also clear query logs if they exist
|
|
const collections = await this.db.listCollections();
|
|
const collectionNames = collections.map(c => c.name);
|
|
|
|
if (collectionNames.includes('queryLogs')) {
|
|
await this.db.collection('queryLogs').truncate();
|
|
}
|
|
|
|
console.log('ArangoDB database cleared successfully');
|
|
} catch (error) {
|
|
console.error('Error clearing ArangoDB database:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
}
|