Implement multi-hop graph traversal with depth tracking

- Extract ALL edges from graph traversal paths, not just endpoints
- Add depth field (edge position in path: 0, 1, 2...)
- Add pathLength field (total edges in path)
- Use numeric index iteration for AQL compatibility
- Apply depth penalty to edge scoring (earlier edges weighted higher)
- Enable visualization of knowledge chains in graph queries
- Increase topK default to 40 for richer multi-hop context

This allows Traditional Graph to show how information is connected
across multiple hops in the knowledge graph, similar to GraphRAG.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Santosh Bhavani 2025-10-25 13:48:52 -07:00
parent 69cd444ea7
commit 7742a9f0de

View File

@ -490,6 +490,178 @@ export class ArangoDBService {
}
}
/**
* Perform graph traversal to find relevant triples using ArangoDB's native graph capabilities
* @param keywords - Array of keywords to search for
* @param maxDepth - Maximum traversal depth (default: 2)
* @param maxResults - Maximum number of results to return (default: 100)
* @returns Promise resolving to array of triples with relevance scores
*/
public async graphTraversal(
keywords: string[],
maxDepth: number = 2,
maxResults: number = 100
): Promise<Array<{
subject: string;
predicate: string;
object: string;
confidence: number;
depth?: number;
}>> {
console.log(`[ArangoDB] graphTraversal called with keywords: ${keywords.join(', ')}`);
if (!this.db) {
throw new Error('ArangoDB connection not initialized. Call initialize() first.');
}
try {
// Build case-insensitive keyword matching conditions
const keywordConditions = keywords
.filter(kw => kw.length > 2) // Filter short words
.map(kw => kw.toLowerCase());
if (keywordConditions.length === 0) {
return [];
}
// AQL query that:
// 1. Finds seed nodes matching keywords
// 2. Performs graph traversal from those nodes
// 3. Scores results based on keyword matches and depth
const query = `
// Find all entities matching keywords (case-insensitive)
LET seedNodes = (
FOR entity IN ${this.collectionName}
LET lowerName = LOWER(entity.name)
LET matches = (
FOR keyword IN @keywords
FILTER CONTAINS(lowerName, keyword)
RETURN 1
)
FILTER LENGTH(matches) > 0
RETURN {
node: entity,
matchCount: LENGTH(matches)
}
)
// Perform graph traversal from seed nodes
// Multi-hop: Extract ALL edges in each path, not just the final edge
LET traversalResults = (
FOR seed IN seedNodes
FOR v, e, p IN 0..@maxDepth ANY seed.node._id ${this.edgeCollectionName}
OPTIONS {uniqueVertices: 'global', bfs: true}
FILTER e != null
// Extract all edges from the path for multi-hop context
LET pathEdges = (
FOR edgeIdx IN 0..(LENGTH(p.edges) - 1)
LET pathEdge = p.edges[edgeIdx]
LET subjectEntity = DOCUMENT(pathEdge._from)
LET objectEntity = DOCUMENT(pathEdge._to)
LET subjectLower = LOWER(subjectEntity.name)
LET objectLower = LOWER(objectEntity.name)
LET predicateLower = LOWER(pathEdge.type)
// Calculate score for this edge
LET subjectMatches = (
FOR kw IN @keywords
FILTER CONTAINS(subjectLower, kw)
LET isExact = (subjectLower == kw)
RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
)
LET objectMatches = (
FOR kw IN @keywords
FILTER CONTAINS(objectLower, kw)
LET isExact = (objectLower == kw)
RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
)
LET predicateMatches = (
FOR kw IN @keywords
FILTER CONTAINS(predicateLower, kw)
LET isExact = (predicateLower == kw)
RETURN isExact ? 50 : (LENGTH(kw) * LENGTH(kw))
)
LET totalScore = SUM(subjectMatches) + SUM(objectMatches) + SUM(predicateMatches)
// Depth penalty (edges earlier in path get slight boost)
LET depthPenalty = 1.0 / (1.0 + (edgeIdx * 0.1))
LET confidence = MIN([totalScore * depthPenalty / 1000.0, 1.0])
FILTER confidence > 0
RETURN {
subject: subjectEntity.name,
predicate: pathEdge.type,
object: objectEntity.name,
confidence: confidence,
depth: edgeIdx,
_edgeId: pathEdge._id,
pathLength: LENGTH(p.edges)
}
)
// Return all edges from this path
FOR pathTriple IN pathEdges
RETURN pathTriple
)
// Remove duplicates by edge ID and sort by confidence
LET uniqueResults = (
FOR result IN traversalResults
COLLECT edgeId = result._edgeId INTO groups
LET best = FIRST(
FOR g IN groups
SORT g.result.confidence DESC
RETURN g.result
)
RETURN best
)
// Sort by confidence and limit results
FOR result IN uniqueResults
SORT result.confidence DESC, result.depth ASC
LIMIT @maxResults
RETURN {
subject: result.subject,
predicate: result.predicate,
object: result.object,
confidence: result.confidence,
depth: result.depth,
pathLength: result.pathLength
}
`;
console.log(`[ArangoDB] Executing query with ${keywordConditions.length} keywords`);
const results = await this.executeQuery(query, {
keywords: keywordConditions,
maxDepth,
maxResults
});
console.log(`[ArangoDB] Multi-hop graph traversal found ${results.length} triples for keywords: ${keywords.join(', ')}`);
// Log top 10 results with confidence scores
if (results.length > 0) {
console.log('[ArangoDB] Top 10 triples by confidence (multi-hop):');
results.slice(0, 10).forEach((triple: any, idx: number) => {
const pathInfo = triple.pathLength ? ` path=${triple.pathLength}` : '';
console.log(` ${idx + 1}. [conf=${triple.confidence?.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth=${triple.depth}${pathInfo})`);
});
} else {
console.log('[ArangoDB] No triples found!');
}
return results;
} catch (error) {
console.error('Error performing graph traversal in ArangoDB:', error);
throw error;
}
}
/**
* Get basic info about the ArangoDB connection
*/
@ -497,7 +669,7 @@ export class ArangoDBService {
if (!this.db) {
return { status: 'not connected' };
}
return {
status: 'connected',
url: this.db.url,