Implement multi-hop graph traversal with depth tracking

- Extract ALL edges from graph traversal paths, not just endpoints - Add depth field (edge position in path: 0, 1, 2...) - Add pathLength field (total edges in path) - Use numeric index iteration for AQL compatibility - Apply depth penalty to edge scoring (earlier edges weighted higher) - Enable visualization of knowledge chains in graph queries - Increase topK default to 40 for richer multi-hop context This allows Traditional Graph to show how information is connected across multiple hops in the knowledge graph, similar to GraphRAG. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-04-23 10:33:51 +00:00 · 2025-10-25 13:48:52 -07:00 · 2025-10-25 13:48:52 -07:00 · 7742a9f0de
commit 7742a9f0de
parent 69cd444ea7
1 changed files with 173 additions and 1 deletions
--- a/nvidia/txt2kg/assets/frontend/lib/arangodb.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/arangodb.ts
@ -490,6 +490,178 @@ export class ArangoDBService {
    }
  }

+  /**
+   * Perform graph traversal to find relevant triples using ArangoDB's native graph capabilities
+   * @param keywords - Array of keywords to search for
+   * @param maxDepth - Maximum traversal depth (default: 2)
+   * @param maxResults - Maximum number of results to return (default: 100)
+   * @returns Promise resolving to array of triples with relevance scores
+   */
+  public async graphTraversal(
+    keywords: string[],
+    maxDepth: number = 2,
+    maxResults: number = 100
+  ): Promise<Array<{
+    subject: string;
+    predicate: string;
+    object: string;
+    confidence: number;
+    depth?: number;
+  }>> {
+    console.log(`[ArangoDB] graphTraversal called with keywords: ${keywords.join(', ')}`);
+
+    if (!this.db) {
+      throw new Error('ArangoDB connection not initialized. Call initialize() first.');
+    }
+
+    try {
+      // Build case-insensitive keyword matching conditions
+      const keywordConditions = keywords
+        .filter(kw => kw.length > 2)  // Filter short words
+        .map(kw => kw.toLowerCase());
+
+      if (keywordConditions.length === 0) {
+        return [];
+      }
+
+      // AQL query that:
+      // 1. Finds seed nodes matching keywords
+      // 2. Performs graph traversal from those nodes
+      // 3. Scores results based on keyword matches and depth
+      const query = `
+        // Find all entities matching keywords (case-insensitive)
+        LET seedNodes = (
+          FOR entity IN ${this.collectionName}
+            LET lowerName = LOWER(entity.name)
+            LET matches = (
+              FOR keyword IN @keywords
+                FILTER CONTAINS(lowerName, keyword)
+                RETURN 1
+            )
+            FILTER LENGTH(matches) > 0
+            RETURN {
+              node: entity,
+              matchCount: LENGTH(matches)
+            }
+        )
+
+        // Perform graph traversal from seed nodes
+        // Multi-hop: Extract ALL edges in each path, not just the final edge
+        LET traversalResults = (
+          FOR seed IN seedNodes
+            FOR v, e, p IN 0..@maxDepth ANY seed.node._id ${this.edgeCollectionName}
+              OPTIONS {uniqueVertices: 'global', bfs: true}
+              FILTER e != null
+
+              // Extract all edges from the path for multi-hop context
+              LET pathEdges = (
+                FOR edgeIdx IN 0..(LENGTH(p.edges) - 1)
+                  LET pathEdge = p.edges[edgeIdx]
+                  LET subjectEntity = DOCUMENT(pathEdge._from)
+                  LET objectEntity = DOCUMENT(pathEdge._to)
+                  LET subjectLower = LOWER(subjectEntity.name)
+                  LET objectLower = LOWER(objectEntity.name)
+                  LET predicateLower = LOWER(pathEdge.type)
+
+                  // Calculate score for this edge
+                  LET subjectMatches = (
+                    FOR kw IN @keywords
+                      FILTER CONTAINS(subjectLower, kw)
+                      LET isExact = (subjectLower == kw)
+                      RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
+                  )
+                  LET objectMatches = (
+                    FOR kw IN @keywords
+                      FILTER CONTAINS(objectLower, kw)
+                      LET isExact = (objectLower == kw)
+                      RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
+                  )
+                  LET predicateMatches = (
+                    FOR kw IN @keywords
+                      FILTER CONTAINS(predicateLower, kw)
+                      LET isExact = (predicateLower == kw)
+                      RETURN isExact ? 50 : (LENGTH(kw) * LENGTH(kw))
+                  )
+
+                  LET totalScore = SUM(subjectMatches) + SUM(objectMatches) + SUM(predicateMatches)
+
+                  // Depth penalty (edges earlier in path get slight boost)
+                  LET depthPenalty = 1.0 / (1.0 + (edgeIdx * 0.1))
+
+                  LET confidence = MIN([totalScore * depthPenalty / 1000.0, 1.0])
+
+                  FILTER confidence > 0
+
+                  RETURN {
+                    subject: subjectEntity.name,
+                    predicate: pathEdge.type,
+                    object: objectEntity.name,
+                    confidence: confidence,
+                    depth: edgeIdx,
+                    _edgeId: pathEdge._id,
+                    pathLength: LENGTH(p.edges)
+                  }
+              )
+
+              // Return all edges from this path
+              FOR pathTriple IN pathEdges
+                RETURN pathTriple
+        )
+
+        // Remove duplicates by edge ID and sort by confidence
+        LET uniqueResults = (
+          FOR result IN traversalResults
+            COLLECT edgeId = result._edgeId INTO groups
+            LET best = FIRST(
+              FOR g IN groups
+                SORT g.result.confidence DESC
+                RETURN g.result
+            )
+            RETURN best
+        )
+
+        // Sort by confidence and limit results
+        FOR result IN uniqueResults
+          SORT result.confidence DESC, result.depth ASC
+          LIMIT @maxResults
+          RETURN {
+            subject: result.subject,
+            predicate: result.predicate,
+            object: result.object,
+            confidence: result.confidence,
+            depth: result.depth,
+            pathLength: result.pathLength
+          }
+      `;
+
+      console.log(`[ArangoDB] Executing query with ${keywordConditions.length} keywords`);
+
+      const results = await this.executeQuery(query, {
+        keywords: keywordConditions,
+        maxDepth,
+        maxResults
+      });
+
+      console.log(`[ArangoDB] Multi-hop graph traversal found ${results.length} triples for keywords: ${keywords.join(', ')}`);
+
+      // Log top 10 results with confidence scores
+      if (results.length > 0) {
+        console.log('[ArangoDB] Top 10 triples by confidence (multi-hop):');
+        results.slice(0, 10).forEach((triple: any, idx: number) => {
+          const pathInfo = triple.pathLength ? ` path=${triple.pathLength}` : '';
+          console.log(`  ${idx + 1}. [conf=${triple.confidence?.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth=${triple.depth}${pathInfo})`);
+        });
+      } else {
+        console.log('[ArangoDB] No triples found!');
+      }
+
+      return results;
+    } catch (error) {
+      console.error('Error performing graph traversal in ArangoDB:', error);
+      throw error;
+    }
+  }
+
  /**
   * Get basic info about the ArangoDB connection
   */
@ -497,7 +669,7 @@ export class ArangoDBService {
    if (!this.db) {
      return { status: 'not connected' };
    }
-    
+
    return {
      status: 'connected',
      url: this.db.url,