mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-27 12:23:51 +00:00
Implement multi-hop graph traversal with depth tracking
- Extract ALL edges from graph traversal paths, not just endpoints - Add depth field (edge position in path: 0, 1, 2...) - Add pathLength field (total edges in path) - Use numeric index iteration for AQL compatibility - Apply depth penalty to edge scoring (earlier edges weighted higher) - Enable visualization of knowledge chains in graph queries - Increase topK default to 40 for richer multi-hop context This allows Traditional Graph to show how information is connected across multiple hops in the knowledge graph, similar to GraphRAG. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
69cd444ea7
commit
7742a9f0de
@ -490,6 +490,178 @@ export class ArangoDBService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform graph traversal to find relevant triples using ArangoDB's native graph capabilities
|
||||||
|
* @param keywords - Array of keywords to search for
|
||||||
|
* @param maxDepth - Maximum traversal depth (default: 2)
|
||||||
|
* @param maxResults - Maximum number of results to return (default: 100)
|
||||||
|
* @returns Promise resolving to array of triples with relevance scores
|
||||||
|
*/
|
||||||
|
public async graphTraversal(
|
||||||
|
keywords: string[],
|
||||||
|
maxDepth: number = 2,
|
||||||
|
maxResults: number = 100
|
||||||
|
): Promise<Array<{
|
||||||
|
subject: string;
|
||||||
|
predicate: string;
|
||||||
|
object: string;
|
||||||
|
confidence: number;
|
||||||
|
depth?: number;
|
||||||
|
}>> {
|
||||||
|
console.log(`[ArangoDB] graphTraversal called with keywords: ${keywords.join(', ')}`);
|
||||||
|
|
||||||
|
if (!this.db) {
|
||||||
|
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Build case-insensitive keyword matching conditions
|
||||||
|
const keywordConditions = keywords
|
||||||
|
.filter(kw => kw.length > 2) // Filter short words
|
||||||
|
.map(kw => kw.toLowerCase());
|
||||||
|
|
||||||
|
if (keywordConditions.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
// AQL query that:
|
||||||
|
// 1. Finds seed nodes matching keywords
|
||||||
|
// 2. Performs graph traversal from those nodes
|
||||||
|
// 3. Scores results based on keyword matches and depth
|
||||||
|
const query = `
|
||||||
|
// Find all entities matching keywords (case-insensitive)
|
||||||
|
LET seedNodes = (
|
||||||
|
FOR entity IN ${this.collectionName}
|
||||||
|
LET lowerName = LOWER(entity.name)
|
||||||
|
LET matches = (
|
||||||
|
FOR keyword IN @keywords
|
||||||
|
FILTER CONTAINS(lowerName, keyword)
|
||||||
|
RETURN 1
|
||||||
|
)
|
||||||
|
FILTER LENGTH(matches) > 0
|
||||||
|
RETURN {
|
||||||
|
node: entity,
|
||||||
|
matchCount: LENGTH(matches)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// Perform graph traversal from seed nodes
|
||||||
|
// Multi-hop: Extract ALL edges in each path, not just the final edge
|
||||||
|
LET traversalResults = (
|
||||||
|
FOR seed IN seedNodes
|
||||||
|
FOR v, e, p IN 0..@maxDepth ANY seed.node._id ${this.edgeCollectionName}
|
||||||
|
OPTIONS {uniqueVertices: 'global', bfs: true}
|
||||||
|
FILTER e != null
|
||||||
|
|
||||||
|
// Extract all edges from the path for multi-hop context
|
||||||
|
LET pathEdges = (
|
||||||
|
FOR edgeIdx IN 0..(LENGTH(p.edges) - 1)
|
||||||
|
LET pathEdge = p.edges[edgeIdx]
|
||||||
|
LET subjectEntity = DOCUMENT(pathEdge._from)
|
||||||
|
LET objectEntity = DOCUMENT(pathEdge._to)
|
||||||
|
LET subjectLower = LOWER(subjectEntity.name)
|
||||||
|
LET objectLower = LOWER(objectEntity.name)
|
||||||
|
LET predicateLower = LOWER(pathEdge.type)
|
||||||
|
|
||||||
|
// Calculate score for this edge
|
||||||
|
LET subjectMatches = (
|
||||||
|
FOR kw IN @keywords
|
||||||
|
FILTER CONTAINS(subjectLower, kw)
|
||||||
|
LET isExact = (subjectLower == kw)
|
||||||
|
RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
|
||||||
|
)
|
||||||
|
LET objectMatches = (
|
||||||
|
FOR kw IN @keywords
|
||||||
|
FILTER CONTAINS(objectLower, kw)
|
||||||
|
LET isExact = (objectLower == kw)
|
||||||
|
RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
|
||||||
|
)
|
||||||
|
LET predicateMatches = (
|
||||||
|
FOR kw IN @keywords
|
||||||
|
FILTER CONTAINS(predicateLower, kw)
|
||||||
|
LET isExact = (predicateLower == kw)
|
||||||
|
RETURN isExact ? 50 : (LENGTH(kw) * LENGTH(kw))
|
||||||
|
)
|
||||||
|
|
||||||
|
LET totalScore = SUM(subjectMatches) + SUM(objectMatches) + SUM(predicateMatches)
|
||||||
|
|
||||||
|
// Depth penalty (edges earlier in path get slight boost)
|
||||||
|
LET depthPenalty = 1.0 / (1.0 + (edgeIdx * 0.1))
|
||||||
|
|
||||||
|
LET confidence = MIN([totalScore * depthPenalty / 1000.0, 1.0])
|
||||||
|
|
||||||
|
FILTER confidence > 0
|
||||||
|
|
||||||
|
RETURN {
|
||||||
|
subject: subjectEntity.name,
|
||||||
|
predicate: pathEdge.type,
|
||||||
|
object: objectEntity.name,
|
||||||
|
confidence: confidence,
|
||||||
|
depth: edgeIdx,
|
||||||
|
_edgeId: pathEdge._id,
|
||||||
|
pathLength: LENGTH(p.edges)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// Return all edges from this path
|
||||||
|
FOR pathTriple IN pathEdges
|
||||||
|
RETURN pathTriple
|
||||||
|
)
|
||||||
|
|
||||||
|
// Remove duplicates by edge ID and sort by confidence
|
||||||
|
LET uniqueResults = (
|
||||||
|
FOR result IN traversalResults
|
||||||
|
COLLECT edgeId = result._edgeId INTO groups
|
||||||
|
LET best = FIRST(
|
||||||
|
FOR g IN groups
|
||||||
|
SORT g.result.confidence DESC
|
||||||
|
RETURN g.result
|
||||||
|
)
|
||||||
|
RETURN best
|
||||||
|
)
|
||||||
|
|
||||||
|
// Sort by confidence and limit results
|
||||||
|
FOR result IN uniqueResults
|
||||||
|
SORT result.confidence DESC, result.depth ASC
|
||||||
|
LIMIT @maxResults
|
||||||
|
RETURN {
|
||||||
|
subject: result.subject,
|
||||||
|
predicate: result.predicate,
|
||||||
|
object: result.object,
|
||||||
|
confidence: result.confidence,
|
||||||
|
depth: result.depth,
|
||||||
|
pathLength: result.pathLength
|
||||||
|
}
|
||||||
|
`;
|
||||||
|
|
||||||
|
console.log(`[ArangoDB] Executing query with ${keywordConditions.length} keywords`);
|
||||||
|
|
||||||
|
const results = await this.executeQuery(query, {
|
||||||
|
keywords: keywordConditions,
|
||||||
|
maxDepth,
|
||||||
|
maxResults
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[ArangoDB] Multi-hop graph traversal found ${results.length} triples for keywords: ${keywords.join(', ')}`);
|
||||||
|
|
||||||
|
// Log top 10 results with confidence scores
|
||||||
|
if (results.length > 0) {
|
||||||
|
console.log('[ArangoDB] Top 10 triples by confidence (multi-hop):');
|
||||||
|
results.slice(0, 10).forEach((triple: any, idx: number) => {
|
||||||
|
const pathInfo = triple.pathLength ? ` path=${triple.pathLength}` : '';
|
||||||
|
console.log(` ${idx + 1}. [conf=${triple.confidence?.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth=${triple.depth}${pathInfo})`);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
console.log('[ArangoDB] No triples found!');
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error performing graph traversal in ArangoDB:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get basic info about the ArangoDB connection
|
* Get basic info about the ArangoDB connection
|
||||||
*/
|
*/
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user