Implement multi-hop graph traversal with depth tracking

- Extract ALL edges from graph traversal paths, not just endpoints - Add depth field (edge position in path: 0, 1, 2...) - Add pathLength field (total edges in path) - Use numeric index iteration for AQL compatibility - Apply depth penalty to edge scoring (earlier edges weighted higher) - Enable visualization of knowledge chains in graph queries - Increase topK default to 40 for richer multi-hop context This allows Traditional Graph to show how information is connected across multiple hops in the knowledge graph, similar to GraphRAG. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-06-24 07:09:31 +00:00 · 2025-10-25 13:48:52 -07:00 · 2025-10-25 13:48:52 -07:00 · 7742a9f0de
commit 7742a9f0de
parent 69cd444ea7
1 changed files with 173 additions and 1 deletions
--- a/nvidia/txt2kg/assets/frontend/lib/arangodb.ts
+++ b/nvidia/txt2kg/assets/frontend/lib/arangodb.ts
@ -490,6 +490,178 @@ export class ArangoDBService {
    }
  }
  /**
   * Perform graph traversal to find relevant triples using ArangoDB's native graph capabilities
   * @param keywords - Array of keywords to search for
   * @param maxDepth - Maximum traversal depth (default: 2)
   * @param maxResults - Maximum number of results to return (default: 100)
   * @returns Promise resolving to array of triples with relevance scores
   */
  public async graphTraversal(
    keywords: string[],
    maxDepth: number = 2,
    maxResults: number = 100
  ): Promise<Array<{
    subject: string;
    predicate: string;
    object: string;
    confidence: number;
    depth?: number;
  }>> {
    console.log(`[ArangoDB] graphTraversal called with keywords: ${keywords.join(', ')}`);
    if (!this.db) {
      throw new Error('ArangoDB connection not initialized. Call initialize() first.');
    }
    try {
      // Build case-insensitive keyword matching conditions
      const keywordConditions = keywords
        .filter(kw => kw.length > 2)  // Filter short words
        .map(kw => kw.toLowerCase());
      if (keywordConditions.length === 0) {
        return [];
      }
      // AQL query that:
      // 1. Finds seed nodes matching keywords
      // 2. Performs graph traversal from those nodes
      // 3. Scores results based on keyword matches and depth
      const query = `
        // Find all entities matching keywords (case-insensitive)
        LET seedNodes = (
          FOR entity IN ${this.collectionName}
            LET lowerName = LOWER(entity.name)
            LET matches = (
              FOR keyword IN @keywords
                FILTER CONTAINS(lowerName, keyword)
                RETURN 1
            )
            FILTER LENGTH(matches) > 0
            RETURN {
              node: entity,
              matchCount: LENGTH(matches)
            }
        )
        // Perform graph traversal from seed nodes
        // Multi-hop: Extract ALL edges in each path, not just the final edge
        LET traversalResults = (
          FOR seed IN seedNodes
            FOR v, e, p IN 0..@maxDepth ANY seed.node._id ${this.edgeCollectionName}
              OPTIONS {uniqueVertices: 'global', bfs: true}
              FILTER e != null
              // Extract all edges from the path for multi-hop context
              LET pathEdges = (
                FOR edgeIdx IN 0..(LENGTH(p.edges) - 1)
                  LET pathEdge = p.edges[edgeIdx]
                  LET subjectEntity = DOCUMENT(pathEdge._from)
                  LET objectEntity = DOCUMENT(pathEdge._to)
                  LET subjectLower = LOWER(subjectEntity.name)
                  LET objectLower = LOWER(objectEntity.name)
                  LET predicateLower = LOWER(pathEdge.type)
                  // Calculate score for this edge
                  LET subjectMatches = (
                    FOR kw IN @keywords
                      FILTER CONTAINS(subjectLower, kw)
                      LET isExact = (subjectLower == kw)
                      RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
                  )
                  LET objectMatches = (
                    FOR kw IN @keywords
                      FILTER CONTAINS(objectLower, kw)
                      LET isExact = (objectLower == kw)
                      RETURN isExact ? 1000 : (LENGTH(kw) * LENGTH(kw))
                  )
                  LET predicateMatches = (
                    FOR kw IN @keywords
                      FILTER CONTAINS(predicateLower, kw)
                      LET isExact = (predicateLower == kw)
                      RETURN isExact ? 50 : (LENGTH(kw) * LENGTH(kw))
                  )
                  LET totalScore = SUM(subjectMatches) + SUM(objectMatches) + SUM(predicateMatches)
                  // Depth penalty (edges earlier in path get slight boost)
                  LET depthPenalty = 1.0 / (1.0 + (edgeIdx * 0.1))
                  LET confidence = MIN([totalScore * depthPenalty / 1000.0, 1.0])
                  FILTER confidence > 0
                  RETURN {
                    subject: subjectEntity.name,
                    predicate: pathEdge.type,
                    object: objectEntity.name,
                    confidence: confidence,
                    depth: edgeIdx,
                    _edgeId: pathEdge._id,
                    pathLength: LENGTH(p.edges)
                  }
              )
              // Return all edges from this path
              FOR pathTriple IN pathEdges
                RETURN pathTriple
        )
        // Remove duplicates by edge ID and sort by confidence
        LET uniqueResults = (
          FOR result IN traversalResults
            COLLECT edgeId = result._edgeId INTO groups
            LET best = FIRST(
              FOR g IN groups
                SORT g.result.confidence DESC
                RETURN g.result
            )
            RETURN best
        )
        // Sort by confidence and limit results
        FOR result IN uniqueResults
          SORT result.confidence DESC, result.depth ASC
          LIMIT @maxResults
          RETURN {
            subject: result.subject,
            predicate: result.predicate,
            object: result.object,
            confidence: result.confidence,
            depth: result.depth,
            pathLength: result.pathLength
          }
      `;
      console.log(`[ArangoDB] Executing query with ${keywordConditions.length} keywords`);
      const results = await this.executeQuery(query, {
        keywords: keywordConditions,
        maxDepth,
        maxResults
      });
      console.log(`[ArangoDB] Multi-hop graph traversal found ${results.length} triples for keywords: ${keywords.join(', ')}`);
      // Log top 10 results with confidence scores
      if (results.length > 0) {
        console.log('[ArangoDB] Top 10 triples by confidence (multi-hop):');
        results.slice(0, 10).forEach((triple: any, idx: number) => {
          const pathInfo = triple.pathLength ? ` path=${triple.pathLength}` : '';
          console.log(`  ${idx + 1}. [conf=${triple.confidence?.toFixed(3)}] ${triple.subject} -> ${triple.predicate} -> ${triple.object} (depth=${triple.depth}${pathInfo})`);
        });
      } else {
        console.log('[ArangoDB] No triples found!');
      }
      return results;
    } catch (error) {
      console.error('Error performing graph traversal in ArangoDB:', error);
      throw error;
    }
  }
  /**
   * Get basic info about the ArangoDB connection
   */