// // SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // "use client" import { useState, useEffect } from "react" import { useDocuments } from "@/contexts/document-context" import { Button } from "@/components/ui/button" import { Sparkles, Loader2, CheckCircle, AlertCircle, FileText, Zap, Cpu, X, ChevronUp, ChevronDown } from "lucide-react" import { Switch } from "@/components/ui/switch" import { Label } from "@/components/ui/label" import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip" import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs" import React from "react" import { AdvancedOptions } from "@/components/advanced-options"; import { PromptConfiguration, PromptConfigurations } from "@/components/prompt-configuration"; import { useShiftSelect } from "@/hooks/use-shift-select" interface EmbeddingsGeneratorProps { showTripleExtraction?: boolean; } type Document = { id: string; name: string; status: string; uploadStatus: string; size: string; triples?: any[]; embeddings?: { count: number; generated: Date; status: "New" | "Processing" | "Processed" | "Error"; error?: string; }; }; interface ContentProps { documents: Document[]; selectedDocs: string[]; handleSelectAll: () => void; handleItemClick: (item: Document, event?: React.MouseEvent) => void; isSelected: (itemId: string) => boolean; error: string | null; status: string; } interface EmbeddingsContentProps extends ContentProps { generateEmbeddings: () => void; isGenerating: boolean; useLangChain: boolean; setUseLangChain: (value: boolean) => void; useSentenceChunking: boolean; setUseSentenceChunking: (value: boolean) => void; embeddingsProvider: string; handleStopEmbeddings: () => void; } interface TriplesContentProps extends ContentProps { extractTriples: (promptConfigs?: PromptConfigurations) => void; isProcessing: boolean; useLangChain: boolean; setUseLangChain: (value: boolean) => void; useSentenceChunking: boolean; setUseSentenceChunking: (value: boolean) => void; useEntityExtraction: boolean; setUseEntityExtraction: (value: boolean) => void; error: string | null; status: string; handleStopProcessing: () => void; } export function EmbeddingsGenerator({ showTripleExtraction = false }: EmbeddingsGeneratorProps) { const { documents, processDocuments, generateEmbeddings: contextGenerateEmbeddings } = useDocuments() const [isGenerating, setIsGenerating] = useState(false) const [isProcessing, setIsProcessing] = useState(false) const [useLangChain, setUseLangChain] = useState(false) const [useSentenceChunking, setUseSentenceChunking] = useState(true) const [useEntityExtraction, setUseEntityExtraction] = useState(true) const [error, setError] = useState(null) const [status, setStatus] = useState("") const [langChainMethod, setLangChainMethod] = React.useState<'default' | 'graphtransformer'>( 'default' ); const [embeddingsProvider, setEmbeddingsProvider] = useState( typeof window !== 'undefined' ? localStorage.getItem("embeddings_provider") || "local" : "local" ); // Use shift-select hook for document selection const { selectedItems: selectedDocs, setSelectedItems: setSelectedDocs, handleItemClick, handleSelectAll, isSelected } = useShiftSelect({ items: documents, getItemId: (doc) => doc.id, canSelect: (doc) => doc.status === "New" || doc.status === "Processed" || doc.status === "Error", onSelectionChange: (selectedIds) => { // Optional: handle selection change if needed } }) // Listen for embeddings settings changes useEffect(() => { const handleEmbeddingsSettingsChanged = () => { const updatedProvider = localStorage.getItem("embeddings_provider") || "local"; setEmbeddingsProvider(updatedProvider); console.log("Embeddings generator detected embeddings settings change:", updatedProvider); }; window.addEventListener('embeddings-settings-changed', handleEmbeddingsSettingsChanged); return () => { window.removeEventListener('embeddings-settings-changed', handleEmbeddingsSettingsChanged); }; }, []); // Handle tab navigation const handleTabChange = (tab: string) => { const tabElement = document.querySelector(`[data-value="${tab}"]`) if (tabElement && 'click' in tabElement) { (tabElement as HTMLElement).click() } } // When LangChain is toggled off, disable dependent options useEffect(() => { if (!useLangChain) { setUseSentenceChunking(false) setUseEntityExtraction(false) } // Dispatch custom event to update embedding model info in Processing Summary const event = new CustomEvent('langChainToggled', { detail: { useLangChain } }); window.dispatchEvent(event); }, [useLangChain]) // Simulate embedding generation const generateEmbeddings = async () => { if (selectedDocs.length === 0) { setError("Please select at least one document") return } setError(null) setIsGenerating(true) setStatus("Preparing documents for embedding generation...") try { // Process each selected document for (let i = 0; i < selectedDocs.length; i++) { const docId = selectedDocs[i]; const doc = documents.find(d => d.id === docId); if (!doc) { console.error(`Document with ID ${docId} not found`); continue; } setStatus(`Generating embeddings for document ${i+1} of ${selectedDocs.length}: ${doc.name}`); await contextGenerateEmbeddings(docId); } setStatus("Embedding generation complete!"); setTimeout(() => { setIsGenerating(false); setStatus(""); }, 1500); } catch (error) { console.error("Error generating embeddings:", error); setError("Failed to generate embeddings. Please try again."); setIsGenerating(false); } } // Extract triples from documents const extractTriples = async (options?: PromptConfigurations & { chunkSize?: number; overlapSize?: number; chunkingMethod?: 'optimized' | 'pyg' }) => { if (selectedDocs.length === 0) { setError("Please select at least one document") return } setError(null) setIsProcessing(true) setStatus("Preparing documents for triple extraction...") // Set up a listener for the processing-complete event const handleProcessingComplete = () => { console.log("Processing complete event received in embeddings-generator"); setIsProcessing(false); setStatus(""); }; window.addEventListener('processing-complete', handleProcessingComplete); try { // Update the processing status display const docNames = selectedDocs.map(id => documents.find(d => d.id === id)?.name || 'Unknown' ).join(', '); // Determine the processing method based on selected model and options let processingMethod = 'default extractor'; try { const selectedModel = localStorage.getItem("selectedModel"); if (selectedModel) { const model = JSON.parse(selectedModel); if (model.provider === "ollama") { processingMethod = `Ollama ${model.model || 'qwen3:1.7b'}`; } else if (model.id?.startsWith("nvidia-")) { processingMethod = 'NVIDIA Nemotron'; } } } catch (e) { // Fallback to default if parsing fails } if (useLangChain) { processingMethod += langChainMethod === 'graphtransformer' ? ' with LLMGraphTransformer' : ' with LangChain'; } setStatus(`Processing ${selectedDocs.length} document(s): ${docNames} using ${processingMethod}`); // Call processDocuments with the selected document IDs and processing options const useGraphTransformer = useLangChain && langChainMethod === 'graphtransformer'; await processDocuments(selectedDocs, { useLangChain, useGraphTransformer, promptConfigs: options || undefined, chunkSize: options?.chunkSize, overlapSize: options?.overlapSize, chunkingMethod: options?.chunkingMethod }); // Navigate to the edit tab after processing is complete setTimeout(() => { // Clean up the event listener window.removeEventListener('processing-complete', handleProcessingComplete); // Navigate to the edit tab handleTabChange("edit"); }, 500); } catch (error) { console.error("Error processing documents:", error) setError("Failed to process documents. Please try again.") setIsProcessing(false) setStatus("") // Clean up the event listener window.removeEventListener('processing-complete', handleProcessingComplete); } } // Stop processing function const handleStopProcessing = async () => { try { const response = await fetch('/api/stop-processing', { method: 'POST', headers: { 'Content-Type': 'application/json', }, }); if (response.ok) { setStatus("Processing stopped by user"); setError(null); setIsProcessing(false); setIsGenerating(false); } else { setError("Failed to stop processing. Please try again."); } } catch (error) { console.error("Error stopping processing:", error); setError("Failed to stop processing. Please try again."); } } // Stop embeddings generation function const handleStopEmbeddings = async () => { try { const response = await fetch('/api/stop-embeddings', { method: 'POST', headers: { 'Content-Type': 'application/json', }, }); if (response.ok) { setStatus("Embeddings generation stopped by user"); setError(null); setIsGenerating(false); } else { setError("Failed to stop embeddings generation. Please try again."); } } catch (error) { console.error("Error stopping embeddings generation:", error); setError("Failed to stop embeddings generation. Please try again."); } } return (
{showTripleExtraction ? ( Triple Extraction Embeddings ) : ( )}
) } // Embeddings content component function EmbeddingsContent({ documents, selectedDocs, handleSelectAll, handleItemClick, isSelected, generateEmbeddings, isGenerating, useLangChain, setUseLangChain, useSentenceChunking, setUseSentenceChunking, error, status, embeddingsProvider, handleStopEmbeddings }: EmbeddingsContentProps) { // Helper function to get embeddings status icon const getEmbeddingsStatusIcon = (doc: Document) => { // Use embeddings status if available, otherwise show 'New' const embeddingsStatus = doc.embeddings?.status || "New"; switch (embeddingsStatus) { case "New": return ; case "Processing": return ; case "Processed": return ; case "Error": return ; default: return ; } }; // Helper function to get embeddings status text const getEmbeddingsStatusText = (doc: Document) => { if (doc.embeddings?.status === "Processed") { return `${doc.embeddings.count} vectors`; } else if (doc.embeddings?.status) { return doc.embeddings.status; } else { return "Ready"; } }; return ( <>

Generate Embeddings

What are embeddings?

Embeddings are vector representations of your documents that enable semantic search and similarity matching between documents.

Generate vector embeddings for semantic search and document similarity

{/* Current embeddings provider indicator */}
Using: {embeddingsProvider === "nvidia" ? "NVIDIA API" : "Local Sentence Transformer"}

Processing Options

{useLangChain && (

Split documents into sentences for more accurate embeddings

)}
{error && (

{error}

)}
doc.uploadStatus === "Uploaded").length && documents.filter(doc => doc.uploadStatus === "Uploaded").length > 0} onChange={handleSelectAll} disabled={documents.length === 0 || isGenerating} /> {selectedDocs.length > 0 ? ( {selectedDocs.length} selected ) : ( Select all )}
{isGenerating && ( )}
{documents.length === 0 ? ( ) : ( documents.map((doc) => ( handleItemClick(doc, e)}> )) )}
Document Size Triple Status Embeddings Status
No documents available for embedding generation
e.stopPropagation()}> handleItemClick(doc, e)} disabled={isGenerating} /> {doc.name} {doc.size}
{doc.status === "New" && ( )} {doc.status === "Processing" && ( )} {doc.status === "Processed" && ( )} {doc.status === "Error" && ( )} {doc.status}
{getEmbeddingsStatusIcon(doc)} {getEmbeddingsStatusText(doc)} {doc.embeddings?.error && (

{doc.embeddings.error}

)}
{isGenerating && (
{status}
)} ) } // Add this function near the top of the file function RadioButton({ id, name, value, checked, onChange, disabled = false, children }: { id: string; name: string; value: string; checked: boolean; onChange: (e: React.ChangeEvent) => void; disabled?: boolean; children: React.ReactNode; }) { return (
); } // Triple extraction content component function TriplesContent({ documents, selectedDocs, handleSelectAll, handleItemClick, isSelected, extractTriples, isProcessing, useLangChain, setUseLangChain, useSentenceChunking, setUseSentenceChunking, useEntityExtraction, setUseEntityExtraction, error, status, handleStopProcessing }: TriplesContentProps) { // Add sorting state const [sortField, setSortField] = useState<'name' | 'size' | 'status'>('name') const [sortDirection, setSortDirection] = useState<'asc' | 'desc'>('asc') // Sort documents based on current sort field and direction const sortedDocuments = React.useMemo(() => { return [...documents].sort((a, b) => { let aValue: string | number let bValue: string | number switch (sortField) { case 'name': aValue = a.name.toLowerCase() bValue = b.name.toLowerCase() break case 'size': aValue = parseFloat(a.size) || 0 bValue = parseFloat(b.size) || 0 break case 'status': aValue = a.status bValue = b.status break default: aValue = a.name.toLowerCase() bValue = b.name.toLowerCase() } if (sortDirection === 'asc') { return aValue < bValue ? -1 : aValue > bValue ? 1 : 0 } else { return aValue > bValue ? -1 : aValue < bValue ? 1 : 0 } }) }, [documents, sortField, sortDirection]) // Handle column header click for sorting const handleSort = (field: 'name' | 'size' | 'status') => { if (sortField === field) { setSortDirection(sortDirection === 'asc' ? 'desc' : 'asc') } else { setSortField(field) setSortDirection('asc') } } const [langChainMethod, setLangChainMethod] = React.useState<'default' | 'graphtransformer'>( 'default' ); const [promptConfigs, setPromptConfigs] = useState(null); // Chunk configuration state const [chunkSize, setChunkSize] = useState(512); const [overlapSize, setOverlapSize] = useState(0); const [chunkingMethod, setChunkingMethod] = useState<'optimized' | 'pyg'>('pyg'); // Handle radio button changes for LangChain method const handleLangChainMethodChange = (e: React.ChangeEvent) => { setLangChainMethod(e.target.value as 'default' | 'graphtransformer'); }; // Load prompt configurations from localStorage on component mount useEffect(() => { try { const savedConfigs = localStorage.getItem("promptConfigurations"); if (savedConfigs) { setPromptConfigs(JSON.parse(savedConfigs)); } } catch (err) { console.error("Error loading prompt configurations:", err); } }, []); // Handle prompt configuration changes const handlePromptConfigsChange = (configs: PromptConfigurations) => { setPromptConfigs(configs); }; // Update actual flag used by API based on both useLangChain and langChainMethod React.useEffect(() => { // This effect is used to monitor langChainMethod changes // The actual implementation of different methods is handled in the API }, [langChainMethod]); // Handle extract triples button click const handleExtractTriples = () => { const options = { ...(promptConfigs || {}), chunkSize, overlapSize, chunkingMethod }; extractTriples(options); }; return ( <>

Knowledge Graph Triple Extraction

Extract structured knowledge triples from documents for knowledge graph construction

Processing Options

{/* Hidden: Use LangChain toggle - LangChain is always used for triple extraction */} {/*
*/} {/*

Leverages LangChain for knowledge extraction from documents

*/} {false && useLangChain && (
LangChain Method
Default Extractor

Uses the standard LangChain extraction pipeline

LLMGraphTransformer

Uses LangChain's specialized graph structure transformer

Split documents into sentences for more accurate triple extraction

Automatically detect and extract entities from documents

)}
{/* Chunk Configuration */}
{/* Chunking Method Selection */}
setChunkingMethod(e.target.value as 'optimized' | 'pyg')} disabled={isProcessing} className="w-4 h-4 text-primary border-border focus:ring-primary" />

Large chunks with overlap for modern LLMs like Gemma3:27b. Best for efficiency.

setChunkingMethod(e.target.value as 'optimized' | 'pyg')} disabled={isProcessing} className="w-4 h-4 text-primary border-border focus:ring-primary" />

PyG's txt2kg.py chunking algorithm with configurable chunk size and overlap.

setChunkSize(Number(e.target.value))} disabled={isProcessing} className="w-full px-3 py-2 border border-border rounded-md bg-background text-foreground focus:outline-none focus:ring-2 focus:ring-primary focus:border-transparent" />

Larger chunks provide more context but use more GPU memory and may lose detailed information.

setOverlapSize(Number(e.target.value))} disabled={isProcessing} className="w-full px-3 py-2 border border-border rounded-md bg-background text-foreground focus:outline-none focus:ring-2 focus:ring-primary focus:border-transparent" />

Overlap between chunks to preserve context across boundaries. Set to 0 for original PyG behavior.

Current Configuration
{chunkingMethod === 'pyg' ? ( <>
• Method: PyTorch Geometric (enhanced with overlap)
• Estimated chunks for 64KB document: ~{Math.ceil(64000 / Math.max(1, chunkSize - overlapSize))}
• Chunk size: {chunkSize.toLocaleString()} characters
• Overlap: {overlapSize} characters {overlapSize === 0 ? '(original PyG)' : '(enhanced)'}
• Best for: {overlapSize === 0 ? 'PyG compatibility' : 'Enhanced context preservation'}
) : ( <>
• Method: Optimized for modern LLMs
• Estimated chunks for 64KB document: ~{Math.ceil(64000 / chunkSize)}
• GPU memory per chunk: ~{Math.round(chunkSize / 1000)}MB
• Overlap: {overlapSize} characters
• Processing efficiency: {chunkSize >= 32000 ? 'Optimal' : chunkSize >= 16000 ? 'Good' : 'Basic'}
)}
{/* Advanced Options with Prompt Configuration */}
{error && (

{error}

)}
(doc.status === "New" || doc.status === "Processed" || doc.status === "Error")).length && documents.filter(doc => (doc.status === "New" || doc.status === "Processed" || doc.status === "Error")).length > 0} onChange={handleSelectAll} disabled={documents.filter(doc => (doc.status === "New" || doc.status === "Processed" || doc.status === "Error")).length === 0 || isProcessing} /> {selectedDocs.length > 0 ? ( {selectedDocs.length} selected ) : ( "Select all" )}
{isProcessing && ( )}
{documents.length === 0 ? (

No documents available for processing

) : ( {sortedDocuments.map((doc) => ( (doc.status === "New" || doc.status === "Processed" || doc.status === "Error") && !isProcessing && handleItemClick(doc, e)} > ))}
handleSort('name')} >
Document {sortField === 'name' && ( sortDirection === 'asc' ? : )}
handleSort('size')} >
Size {sortField === 'size' && ( sortDirection === 'asc' ? : )}
handleSort('status')} >
Status {sortField === 'status' && ( sortDirection === 'asc' ? : )}
e.stopPropagation()}> { e.stopPropagation(); handleItemClick(doc, e); }} disabled={(doc.status !== "New" && doc.status !== "Processed" && doc.status !== "Error") || isProcessing} />
{doc.name}
{doc.status === "New" && ( {doc.status} )} {doc.status === "Processing" && ( {doc.status} )} {doc.status === "Processed" && ( {doc.status} )} {doc.status === "Error" && ( {doc.status} )} {doc.size} KB
)}
{isProcessing && status && (
{status}
)} ) } function InfoIcon(props: React.SVGProps) { return ( ) }