mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 10:33:51 +00:00
Implement multi-hop graph traversal with depth tracking
- Extract ALL edges from graph traversal paths, not just endpoints - Add depth field (edge position in path: 0, 1, 2...) - Add pathLength field (total edges in path) - Use numeric index iteration for AQL compatibility - Apply depth penalty to edge scoring (earlier edges weighted higher) - Enable visualization of knowledge chains in graph queries - Increase topK default to 40 for richer multi-hop context This allows Traditional Graph to show how information is connected across multiple hops in the knowledge graph, similar to GraphRAG. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
69cd444ea7
commit
7742a9f0de
@ -490,6 +490,178 @@ export class ArangoDBService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform graph traversal to find relevant triples using ArangoDB's native graph capabilities
|
||||
* @param keywords - Array of keywords to search for
|
||||
* @param maxDepth - Maximum traversal depth (default: 2)
|
||||
* @param maxResults - Maximum number of results to return (default: 100)
|
||||
* @returns Promise resolving to array of triples with relevance scores
|
||||
*/
|
||||
public async graphTraversal(
|
||||
keywords: string[],
|
||||
maxDepth: number = 2,
|
||||
maxResults: number = 100
|
||||
): Promise<Array<{
|
||||
subject: string;
|
||||
predicate: string;
|
||||
object: string;
|
||||
confidence: number;
|
||||
depth?: number;
|
||||
}>> {
|
||||
console.log(`[ArangoDB] graphTraversal called with keywords: ${keywords.join(', ')}`);
|
||||
|
||||
if (!this.db) {
|
||||
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
||||
}
|
||||
|
||||
try {
|
||||
// Build case-insensitive keyword matching conditions
|
||||
const keywordConditions = keywords
|
||||
.filter(kw => kw.length > 2) // Filter short words
|
||||
.map(kw => kw.toLowerCase());
|
||||
|
||||
if (keywordConditions.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// AQL query that:
|
||||
// 1. Finds seed nodes matching keywords
|
||||
// 2. Performs graph traversal from those nodes
|
||||
// 3. Scores results based on keyword matches and depth
|
||||
const query = `
|
||||
// Find all entities matching keywords (case-insensitive)
|
||||
LET seedNodes = (
|
||||
FOR entity IN ${this.collectionName}
|
||||
LET lowerName = LOWER(entity.name)
|
||||
LET matches = (
|
||||
FOR keyword IN @keywords
|
||||
FILTER CONTAINS(lowerName, keyword)
|
||||
RETURN 1
|
||||
)
|
||||
FILTER LENGTH(matches) > 0
|
||||
RETURN {
|
||||
node: entity,
|
||||
matchCount: LENGTH(matches)
|
||||
}
|
||||
)
|
||||
|
||||
// Perform graph traversal from seed nodes
|
||||
// Multi-hop: Extract ALL edges in each path, not just the final edge
|
||||
LET traversalResults = (
|
||||
FOR seed IN seedNodes
|
||||
FOR v, e, p IN 0..@maxDepth ANY seed.node._id ${this.edgeCollectionName}
|
||||
OPTIONS {uniqueVertices: 'global', bfs: true}
|
||||
FILTER e != null
|
||||
|
||||
// Extract all edges from the path for multi-hop context
|
||||
LET pathEdges = (
|
||||
FOR edgeIdx IN 0..(LENGTH(p.edges) - 1)
|
||||
LET pathEdge = p.edges[edgeIdx]
|
||||
LET subjectEntity = DOCUMENT(pathEdge._from)
|
||||
LET objectEntity = DOCUMENT(pathEdge._to)
|
||||
LET subjectLower = LOWER(subjectEntity.name)
|
||||
LET objectLower = LOWER(objectEntity.name)
|
||||
LET predicateLower = LOWER(pathEdge.type)
|
||||
|
||||
// Calculate score for this edge
|
||||
LET subjectMatches = (
|
||||
FOR kw IN @keywords
|
||||
FILTER CONTAINS(subjectLower, kw)
|
||||
LET isExact = (subjectLower == kw)
|
||||
RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
|
||||
)
|
||||
LET objectMatches = (
|
||||
FOR kw IN @keywords
|
||||
FILTER CONTAINS(objectLower, kw)
|
||||
LET isExact = (objectLower == kw)
|
||||
RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
|
||||
)
|
||||
LET predicateMatches = (
|
||||
FOR kw IN @keywords
|
||||
FILTER CONTAINS(predicateLower, kw)
|
||||
LET isExact = (predicateLower == kw)
|
||||
RETURN isExact ? 50 : (LENGTH(kw) * LENGTH(kw))
|
||||
)
|
||||
|
||||
LET totalScore = SUM(subjectMatches) + SUM(objectMatches) + SUM(predicateMatches)
|
||||
|
||||
// Depth penalty (edges earlier in path get slight boost)
|
||||
LET depthPenalty = 1.0 / (1.0 + (edgeIdx * 0.1))
|
||||
|
||||
LET confidence = MIN([totalScore * depthPenalty / 1000.0, 1.0])
|
||||
|
||||
FILTER confidence > 0
|
||||
|
||||
RETURN {
|
||||
subject: subjectEntity.name,
|
||||
predicate: pathEdge.type,
|
||||
object: objectEntity.name,
|
||||
confidence: confidence,
|
||||
depth: edgeIdx,
|
||||
_edgeId: pathEdge._id,
|
||||
pathLength: LENGTH(p.edges)
|
||||
}
|
||||
)
|
||||
|
||||
// Return all edges from this path
|
||||
FOR pathTriple IN pathEdges
|
||||
RETURN pathTriple
|
||||
)
|
||||
|
||||
// Remove duplicates by edge ID and sort by confidence
|
||||
LET uniqueResults = (
|
||||
FOR result IN traversalResults
|
||||
COLLECT edgeId = result._edgeId INTO groups
|
||||
LET best = FIRST(
|
||||
FOR g IN groups
|
||||
SORT g.result.confidence DESC
|
||||
RETURN g.result
|
||||
)
|
||||
RETURN best
|
||||
)
|
||||
|
||||
// Sort by confidence and limit results
|
||||
FOR result IN uniqueResults
|
||||
SORT result.confidence DESC, result.depth ASC
|
||||
LIMIT @maxResults
|
||||
RETURN {
|
||||
subject: result.subject,
|
||||
predicate: result.predicate,
|
||||
object: result.object,
|
||||
confidence: result.confidence,
|
||||
depth: result.depth,
|
||||
pathLength: result.pathLength
|
||||
}
|
||||
`;
|
||||
|
||||
console.log(`[ArangoDB] Executing query with ${keywordConditions.length} keywords`);
|
||||
|
||||
const results = await this.executeQuery(query, {
|
||||
keywords: keywordConditions,
|
||||
maxDepth,
|
||||
maxResults
|
||||
});
|
||||
|
||||
console.log(`[ArangoDB] Multi-hop graph traversal found ${results.length} triples for keywords: ${keywords.join(', ')}`);
|
||||
|
||||
// Log top 10 results with confidence scores
|
||||
if (results.length > 0) {
|
||||
console.log('[ArangoDB] Top 10 triples by confidence (multi-hop):');
|
||||
results.slice(0, 10).forEach((triple: any, idx: number) => {
|
||||
const pathInfo = triple.pathLength ? ` path=${triple.pathLength}` : '';
|
||||
console.log(` ${idx + 1}. [conf=${triple.confidence?.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth=${triple.depth}${pathInfo})`);
|
||||
});
|
||||
} else {
|
||||
console.log('[ArangoDB] No triples found!');
|
||||
}
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
console.error('Error performing graph traversal in ArangoDB:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get basic info about the ArangoDB connection
|
||||
*/
|
||||
@ -497,7 +669,7 @@ export class ArangoDBService {
|
||||
if (!this.db) {
|
||||
return { status: 'not connected' };
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
status: 'connected',
|
||||
url: this.db.url,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user