2025-12-02 19:43:52 +00:00
//
// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
2025-10-06 17:05:41 +00:00
"use client"
import type React from "react"
import { createContext , useContext , useState , useEffect } from "react"
import { type Triple , processTextWithChunking , processTextWithChunkingPyG , triplesToGraph } from "@/utils/text-processing"
import { useRouter } from "next/navigation"
import { toast } from "@/hooks/use-toast"
import { type PromptConfigurations } from "@/components/prompt-configuration"
export type Document = {
id : string
name : string
status : "New" | "Processing" | "Processed" | "Error"
uploadStatus : "Uploading" | "Uploaded"
size : string
file : File
content? : string
triples? : Triple [ ]
graph ? : {
nodes : Array < { id : string ; label : string } >
edges : Array < { source : string ; target : string ; label : string } >
}
error? : string
chunkCount? : number
extractedDate? : Date
processingMethod ? : 'default' | 'langchain' | 'graphtransformer' | 'fallback'
embeddings ? : {
count : number
generated : Date
status : "New" | "Processing" | "Processed" | "Error"
error? : string
}
}
export type LLMProvider = 'nvidia' | 'ollama' ;
export type ProcessingOptions = {
useLangChain? : boolean ;
useGraphTransformer? : boolean ;
promptConfigs? : PromptConfigurations ;
llmProvider? : LLMProvider ;
ollamaModel? : string ;
ollamaBaseUrl? : string ;
chunkSize? : number ;
overlapSize? : number ;
chunkingMethod ? : 'optimized' | 'pyg' ;
} ;
type DocumentContextType = {
documents : Document [ ]
addDocuments : ( files : File [ ] ) = > void
deleteDocuments : ( documentIds : string [ ] ) = > void
clearDocuments : ( ) = > void
processDocuments : ( selectedDocIds? : string [ ] , options? : ProcessingOptions ) = > Promise < void >
// Legacy method for backward compatibility
processDocumentsLegacy : ( useLangChain : boolean , selectedDocIds? : string [ ] , useGraphTransformer? : boolean , promptConfigs? : PromptConfigurations ) = > Promise < void >
isProcessing : boolean
updateTriples : ( documentId : string , triples : Triple [ ] ) = > void
addTriple : ( documentId : string , triple : Triple ) = > void
editTriple : ( documentId : string , index : number , triple : Triple ) = > void
deleteTriple : ( documentId : string , index : number ) = > void
openGraphVisualization : ( documentId? : string ) = > Promise < void >
generateEmbeddings : ( documentId : string ) = > Promise < void >
isGeneratingEmbeddings : boolean
viewTriples ? : ( documentId : string ) = > void
}
const DocumentContext = createContext < DocumentContextType | undefined > ( undefined )
// Utility function to generate UUID with fallback
const generateUUID = ( ) : string = > {
// Check if crypto.randomUUID is available
if ( typeof crypto !== 'undefined' && crypto . randomUUID ) {
try {
return crypto . randomUUID ( ) ;
} catch ( error ) {
console . warn ( 'crypto.randomUUID failed, using fallback:' , error ) ;
}
}
// Fallback UUID generation
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx' . replace ( /[xy]/g , function ( c ) {
const r = Math . random ( ) * 16 | 0 ;
const v = c == 'x' ? r : ( r & 0x3 | 0x8 ) ;
return v . toString ( 16 ) ;
} ) ;
} ;
export function DocumentProvider ( { children } : { children : React.ReactNode } ) {
const router = useRouter ( )
const [ documents , setDocuments ] = useState < Document [ ] > ( [ ] )
const [ isInitialized , setIsInitialized ] = useState ( false )
const [ isProcessing , setIsProcessing ] = useState ( false )
const [ isGeneratingEmbeddings , setIsGeneratingEmbeddings ] = useState ( false )
const [ apiKey , setApiKey ] = useState < string | null > ( null )
// Load API key from localStorage on client-side only
useEffect ( ( ) = > {
if ( typeof window !== 'undefined' ) {
// API key loading removed - xAI integration has been removed
}
} , [ ] ) ;
// Load from localStorage on client-side only
useEffect ( ( ) = > {
if ( ! isInitialized ) {
try {
const savedDocuments = localStorage . getItem ( 'txt2kg_documents' )
if ( savedDocuments ) {
const parsedDocuments = JSON . parse ( savedDocuments )
// Reconstruct documents with placeholder File objects
const reconstructedDocs = parsedDocuments . map ( ( doc : any ) = > {
// Create a blob from the content if available
let file : File ;
if ( doc . content ) {
// Create a File object from the content string we previously saved
const blob = new Blob ( [ doc . content ] , { type : 'text/plain' } ) ;
file = new File ( [ blob ] , doc . name , { type : 'text/plain' } ) ;
} else {
// Create an empty placeholder if no content is available
file = new File ( [ ] , doc . name , { type : 'text/plain' } ) ;
}
return {
. . . doc ,
file
} ;
} ) ;
console . log ( ` Restored ${ reconstructedDocs . length } documents from localStorage ` ) ;
setDocuments ( reconstructedDocs ) ;
}
} catch ( error ) {
console . error ( 'Error loading documents from localStorage:' , error ) ;
}
setIsInitialized ( true ) ;
}
} , [ isInitialized ] ) ;
// Save documents to localStorage whenever they change, but only after initialization
useEffect ( ( ) = > {
if ( isInitialized ) {
try {
if ( documents . length > 0 ) {
// Serialize documents for localStorage storage
// We need to ensure large documents don't exceed localStorage limits
// Focus on saving processed data (triples & graph) rather than raw content for large files
const documentsToSave = documents . map ( doc = > {
// Don't save content for very large documents to avoid localStorage limits
// But keep it for smaller ones to avoid reprocessing
const shouldSaveContent = ! doc . content || doc . content . length < 100000 ;
return {
. . . doc ,
// Omit the actual File object as it can't be serialized
file : {
name : doc.file.name ,
size : doc.file.size ,
type : doc . file . type
} ,
// Only include content for smaller documents
content : shouldSaveContent ? doc.content : undefined
} ;
} ) ;
localStorage . setItem ( 'txt2kg_documents' , JSON . stringify ( documentsToSave ) ) ;
console . log ( ` Saved ${ documents . length } documents to localStorage ` ) ;
} else {
// Clear localStorage if documents array is empty
localStorage . removeItem ( 'txt2kg_documents' ) ;
console . log ( 'Cleared documents from localStorage' ) ;
}
} catch ( error ) {
console . error ( 'Error saving documents to localStorage:' , error ) ;
}
}
} , [ documents , isInitialized ] )
const addDocuments = ( files : File [ ] ) = > {
const newDocuments = files . map ( ( file ) = > ( {
id : generateUUID ( ) ,
name : file.name ,
status : "New" as const ,
uploadStatus : "Uploaded" as const ,
size : ( file . size / 1024 ) . toFixed ( 2 ) , // Convert to KB
file ,
} ) )
setDocuments ( ( prev ) = > [ . . . prev , . . . newDocuments ] )
}
const deleteDocuments = ( documentIds : string [ ] ) = > {
setDocuments ( ( prev ) = > prev . filter ( ( doc ) = > ! documentIds . includes ( doc . id ) ) )
}
const clearDocuments = ( ) = > {
setDocuments ( [ ] )
}
const updateDocumentStatus = ( id : string , status : Document [ "status" ] , updates : Partial < Document > = { } ) = > {
console . log ( ` Updating document ${ id } status to: ${ status } ` ) ;
setDocuments ( ( prev ) = > {
const updated = prev . map ( ( doc ) = > ( doc . id === id ? { . . . doc , status , . . . updates } : doc ) ) ;
// Force UI refresh by adding timestamp to document state
// This ensures React detects the change and re-renders components
const timestamped = updated . map ( doc = > ( {
. . . doc ,
_lastUpdated : Date.now ( ) // Adding timestamp helps React detect changes
} ) ) ;
return timestamped ;
} ) ;
// Trigger a custom event for components that need to refresh
if ( typeof window !== 'undefined' ) {
console . log ( 'Dispatching document-status-changed event' ) ;
window . dispatchEvent ( new CustomEvent ( 'document-status-changed' , {
detail : { documentId : id , status }
} ) ) ;
}
}
const updateTriples = ( documentId : string , triples : Triple [ ] ) = > {
// Helper function to normalize text
const normalizeText = ( text : string ) : string = > {
return text . replace ( /['"()]/g , '' ) . trim ( ) ;
} ;
// Normalize triples before saving
const normalizedTriples = triples . map ( triple = > ( {
subject : normalizeText ( triple . subject ) ,
predicate : normalizeText ( triple . predicate ) ,
object : normalizeText ( triple . object )
} ) ) ;
setDocuments ( ( prev ) = >
prev . map ( ( doc ) = > {
if ( doc . id === documentId ) {
const graph = triplesToGraph ( normalizedTriples )
return { . . . doc , triples : normalizedTriples , graph }
}
return doc
} ) ,
)
}
const addTriple = ( documentId : string , triple : Triple ) = > {
// Helper function to normalize text with null/undefined checks
const normalizeText = ( text : string | null | undefined ) : string = > {
if ( ! text || typeof text !== 'string' ) return '' ;
return text . replace ( /['"()]/g , '' ) . trim ( ) ;
} ;
// Normalize the new triple
const normalizedTriple = {
subject : normalizeText ( triple . subject ) ,
predicate : normalizeText ( triple . predicate ) ,
object : normalizeText ( triple . object )
} ;
setDocuments ( ( prev ) = >
prev . map ( ( doc ) = > {
if ( doc . id === documentId && doc . triples ) {
const newTriples = [ . . . doc . triples , normalizedTriple ]
const graph = triplesToGraph ( newTriples )
return { . . . doc , triples : newTriples , graph }
}
return doc
} ) ,
)
}
const editTriple = ( documentId : string , index : number , triple : Triple ) = > {
// Helper function to normalize text with null/undefined checks
const normalizeText = ( text : string | null | undefined ) : string = > {
if ( ! text || typeof text !== 'string' ) return '' ;
return text . replace ( /['"()]/g , '' ) . trim ( ) ;
} ;
// Normalize the edited triple
const normalizedTriple = {
subject : normalizeText ( triple . subject ) ,
predicate : normalizeText ( triple . predicate ) ,
object : normalizeText ( triple . object )
} ;
setDocuments ( ( prev ) = >
prev . map ( ( doc ) = > {
if ( doc . id === documentId && doc . triples ) {
const newTriples = [ . . . doc . triples ]
newTriples [ index ] = normalizedTriple
const graph = triplesToGraph ( newTriples )
return { . . . doc , triples : newTriples , graph }
}
return doc
} ) ,
)
}
const deleteTriple = ( documentId : string , index : number ) = > {
setDocuments ( ( prev ) = >
prev . map ( ( doc ) = > {
if ( doc . id === documentId && doc . triples ) {
const newTriples = doc . triples . filter ( ( _ , i ) = > i !== index )
const graph = triplesToGraph ( newTriples )
return { . . . doc , triples : newTriples , graph }
}
return doc
} ) ,
)
}
const readFileContent = ( file : File ) : Promise < string > = > {
return new Promise ( ( resolve , reject ) = > {
// Check if it's a valid file with size
if ( file . size === 0 ) {
// Handle zero-byte files
console . warn ( ` File ${ file . name } is empty (0 bytes) ` ) ;
reject ( new Error ( 'File is empty (0 bytes)' ) ) ;
return ;
}
// If the file isn't a real file (like from localStorage), handle that case
if ( ! ( file instanceof Blob ) || ( file . size === 0 && file . type === '' ) ) {
console . warn ( ` File ${ file . name } appears to be a placeholder or invalid ` ) ;
reject ( new Error ( 'Invalid file reference - likely a placeholder' ) ) ;
return ;
}
const reader = new FileReader ( ) ;
reader . onload = ( e ) = > {
const content = e . target ? . result as string ;
if ( ! content || content . trim ( ) === '' ) {
console . warn ( ` File ${ file . name } content is empty or whitespace only ` ) ;
reject ( new Error ( 'File content is empty' ) ) ;
return ;
}
resolve ( content ) ;
} ;
reader . onerror = ( e ) = > {
console . error ( ` Error reading file ${ file . name } : ` , e ) ;
reject ( e ) ;
} ;
reader . readAsText ( file ) ;
} ) ;
}
const extractTriplesFromChunk = async ( chunk : string , systemPrompt? : string ) : Promise < Triple [ ] > = > {
console . log ( ` Extracting triples from chunk of length: ${ chunk . length } ` )
// Create headers with API key if available
const headers : Record < string , string > = {
"Content-Type" : "application/json" ,
}
// Add API key to headers if available
if ( apiKey ) {
headers [ "X-API-Key" ] = apiKey
}
// Prepare request body with optional custom system prompt
const requestBody : any = { text : chunk } ;
if ( systemPrompt ) {
requestBody . systemPrompt = systemPrompt ;
}
// Add LLM provider information based on selected model
const selectedModel = localStorage . getItem ( "selectedModel" ) ;
if ( selectedModel ) {
try {
const model = JSON . parse ( selectedModel ) ;
if ( model . provider === "ollama" ) {
requestBody . llmProvider = "ollama" ;
requestBody . ollamaModel = model . model || "llama3.1:8b" ;
console . log ( ` 🦙 Using Ollama model: ${ requestBody . ollamaModel } ` ) ;
} else if ( model . id === "nvidia-nemotron" || model . id === "nvidia-nemotron-nano" ) {
requestBody . llmProvider = "nvidia" ;
2025-12-02 19:43:52 +00:00
requestBody . nvidiaModel = model . model ; // Pass the actual model name
console . log ( ` 🖥️ Using NVIDIA model: ${ model . model } ` ) ;
2025-10-06 17:05:41 +00:00
}
} catch ( e ) {
// Ignore parsing errors, will use default
console . log ( ` ⚠️ Error parsing selected model, using default ` ) ;
}
} else {
console . log ( ` ⚠️ No selected model found, using default ` ) ;
}
const response = await fetch ( "/api/extract-triples" , {
method : "POST" ,
headers ,
body : JSON.stringify ( requestBody ) ,
// Rely on server-side timeout configuration instead of client-side AbortSignal
} )
console . log ( "API response status:" , response . status )
const data = await response . json ( )
if ( ! response . ok ) {
console . error ( "API error:" , data )
throw new Error ( data . error || "Failed to extract triples" )
}
console . log ( "API response data:" , data )
console . log ( "Triples count:" , data . triples ? . length || 0 )
return data . triples || [ ]
}
// New processDocuments method with better options structure
const processDocuments = async (
selectedDocIds? : string [ ] ,
options? : ProcessingOptions
) = > {
console . log ( '🔍 processDocuments called with:' , {
selectedDocIds ,
selectedCount : selectedDocIds?.length || 0 ,
options
} ) ;
const {
useLangChain = false ,
useGraphTransformer = false ,
promptConfigs ,
llmProvider = 'ollama' ,
ollamaModel = 'qwen3:1.7b' ,
ollamaBaseUrl = 'http://localhost:11434/v1' ,
chunkSize = 64000 ,
overlapSize = 2000 ,
chunkingMethod = 'optimized'
} = options || { } ;
return processDocumentsImpl ( useLangChain , selectedDocIds , useGraphTransformer , promptConfigs , {
llmProvider ,
ollamaModel ,
ollamaBaseUrl ,
chunkSize ,
overlapSize ,
chunkingMethod
} ) ;
} ;
// Legacy method for backward compatibility
const processDocumentsLegacy = async (
useLangChain : boolean ,
selectedDocIds? : string [ ] ,
useGraphTransformer? : boolean ,
promptConfigs? : PromptConfigurations
) = > {
return processDocumentsImpl ( useLangChain , selectedDocIds , useGraphTransformer , promptConfigs ) ;
} ;
const processDocumentsImpl = async (
useLangChain : boolean ,
selectedDocIds? : string [ ] ,
useGraphTransformer? : boolean ,
promptConfigs? : PromptConfigurations ,
llmOptions ? : {
llmProvider? : LLMProvider ;
ollamaModel? : string ;
ollamaBaseUrl? : string ;
chunkSize? : number ;
overlapSize? : number ;
chunkingMethod ? : 'optimized' | 'pyg' ;
}
) = > {
console . log ( '🔍 processDocumentsImpl called with:' , {
useLangChain ,
selectedDocIds ,
selectedCount : selectedDocIds?.length || 0 ,
useGraphTransformer ,
totalDocuments : documents.length
} ) ;
// If selectedDocIds is explicitly provided, use it
// If not provided, don't process anything (instead of processing all docs)
const docIdsToProcess = selectedDocIds || [ ] ;
console . log ( '🔍 Document IDs to process:' , docIdsToProcess ) ;
// Get selected documents - filter by the provided selectedDocIds array
const docsToProcess = documents . filter (
( doc ) = > docIdsToProcess . includes ( doc . id ) &&
( doc . status === "New" || doc . status === "Processed" || doc . status === "Error" )
) ;
console . log ( '🔍 Documents to process:' , docsToProcess . map ( d = > ( { id : d.id , name : d.name , status : d.status } ) ) ) ;
if ( docsToProcess . length === 0 ) {
console . log ( "❌ No documents to process - either none selected or none have valid status" ) ;
return ;
}
setIsProcessing ( true ) ;
try {
2025-12-02 19:43:52 +00:00
// Check which documents are already processed in ArangoDB
console . log ( '🔍 Checking which documents are already processed in ArangoDB...' ) ;
let alreadyProcessedDocs : Set < string > = new Set ( ) ;
try {
const response = await fetch ( '/api/graph-db/check-document' , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( {
documentNames : docsToProcess.map ( d = > d . name )
} )
} ) ;
if ( response . ok ) {
const result = await response . json ( ) ;
if ( result . processedDocuments ) {
Object . entries ( result . processedDocuments ) . forEach ( ( [ docName , isProcessed ] ) = > {
if ( isProcessed ) {
alreadyProcessedDocs . add ( docName ) ;
}
} ) ;
console . log ( ` ✅ Found ${ alreadyProcessedDocs . size } documents already processed in ArangoDB: ` , Array . from ( alreadyProcessedDocs ) ) ;
}
}
} catch ( checkError ) {
console . warn ( '⚠️ Could not check for already processed documents, continuing anyway:' , checkError ) ;
}
2025-10-06 17:05:41 +00:00
// Process each document sequentially
for ( const doc of docsToProcess ) {
2025-12-02 19:43:52 +00:00
// Skip if document is already processed in ArangoDB
if ( alreadyProcessedDocs . has ( doc . name ) ) {
console . log ( ` ⏭️ Skipping document " ${ doc . name } " - already processed in ArangoDB ` ) ;
updateDocumentStatus ( doc . id , "Processed" , {
triples : doc.triples || [ ] ,
graph : doc.graph ,
error : undefined
} ) ;
toast ( {
title : "Document Skipped" ,
description : ` " ${ doc . name } " is already stored in ArangoDB ` ,
duration : 3000 ,
} ) ;
continue ;
}
2025-10-06 17:05:41 +00:00
// Update status to Processing before we begin
updateDocumentStatus ( doc . id , "Processing" ) ;
try {
// Read file content if not already available
let content = doc . content ;
if ( ! content ) {
content = await readFileContent ( doc . file ) ;
}
console . log ( ` 🚀 Processing document ${ doc . name } , useLangChain: ${ useLangChain } , isCSV: ${ doc . name . toLowerCase ( ) . endsWith ( '.csv' ) } ` ) ;
// Handle CSV files specially - always use row-as-document processing regardless of LangChain setting
if ( doc . name . toLowerCase ( ) . endsWith ( '.csv' ) ) {
console . log ( '📊 Processing CSV file with row-as-document approach:' , doc . name ) ;
try {
const triples = await parseCSVContent ( content ) ;
console . log ( ` ✅ CSV processing complete: ${ triples . length } triples extracted ` ) ;
// Send to process-document API
const response = await fetch ( '/api/process-document' , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( {
text : content ,
filename : doc.name ,
triples : triples ,
useLangChain : useLangChain , // Pass through the original setting
useGraphTransformer : useGraphTransformer ,
systemPrompt : promptConfigs?.systemPrompt ,
extractionPrompt : promptConfigs?.extractionPrompt ,
graphTransformerPrompt : promptConfigs?.graphTransformerPrompt
} )
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Document processing failed: ${ response . statusText } ` ) ;
}
const result = await response . json ( ) ;
// Update the document with triples and graph
updateDocumentStatus ( doc . id , "Processed" , {
triples : triples ,
graph : triplesToGraph ( triples ) ,
metadata : {
totalTriples : triples.length ,
processingMethod : 'csv_row_as_document' ,
langchainUsed : useLangChain ,
graphTransformerUsed : useGraphTransformer
}
} ) ;
console . log ( ` ✅ Document ${ doc . name } processed successfully with ${ triples . length } triples ` ) ;
} catch ( error ) {
console . error ( ` ❌ Error processing CSV file ${ doc . name } : ` , error ) ;
updateDocumentStatus ( doc . id , "Error" , undefined , error instanceof Error ? error . message : 'Unknown error' ) ;
}
continue ; // Skip the rest of the processing for CSV files
}
if ( useLangChain ) {
// Use process-document endpoint with useLangChain flag
console . log ( ` Processing document ${ doc . name } with LangChain via process-document API... ` ) ;
// Extract triples using the default method first (for fallback)
let triples : Triple [ ] = [ ] ;
try {
// Convert JSON to text if it's a JSON file
let processedContent = content ;
if ( doc . name . toLowerCase ( ) . endsWith ( '.json' ) ) {
processedContent = convertJsonToText ( content ) ;
}
// Pass the custom system prompt if available
const systemPrompt = promptConfigs ? . systemPrompt ;
triples = await processTextWithChunking (
processedContent ,
( chunk ) = > extractTriplesFromChunk ( chunk , systemPrompt )
) ;
// Call the process-document API endpoint with useLangChain flag
// NOTE: This no longer automatically stores triples in Neo4j.
// Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button.
console . log ( ` Sending ${ triples . length } triples to process-document API with useLangChain=true ${ useGraphTransformer ? 'using GraphTransformer' : '' } ` ) ;
// Include prompt configurations in the request body
const requestBody : any = {
text : doc.name.toLowerCase ( ) . endsWith ( '.json' ) ? convertJsonToText ( content ) : content ,
filename : doc.name ,
triples : triples ,
useLangChain : true ,
useGraphTransformer : useGraphTransformer
} ;
// Add LLM provider options if available
if ( llmOptions ) {
if ( llmOptions . llmProvider ) {
requestBody . llmProvider = llmOptions . llmProvider ;
}
if ( llmOptions . ollamaModel ) {
requestBody . ollamaModel = llmOptions . ollamaModel ;
}
if ( llmOptions . ollamaBaseUrl ) {
requestBody . ollamaBaseUrl = llmOptions . ollamaBaseUrl ;
}
}
// Add prompt configs if available
if ( promptConfigs ) {
if ( useGraphTransformer && promptConfigs . graphTransformerPrompt ) {
requestBody . graphTransformerPrompt = promptConfigs . graphTransformerPrompt ;
} else if ( promptConfigs . defaultExtractionPrompt ) {
requestBody . extractionPrompt = promptConfigs . defaultExtractionPrompt ;
}
}
const response = await fetch ( '/api/process-document' , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( requestBody )
} ) ;
if ( ! response . ok ) {
const errorText = await response . text ( ) ;
console . error ( ` Document processing API error: ${ response . status } ${ response . statusText } ` , errorText ) ;
throw new Error ( ` Document processing failed: ${ response . statusText } - ${ errorText } ` ) ;
}
const result = await response . json ( ) ;
console . log ( ` Received response from process-document API with ${ result . triples ? . length || 0 } triples ` ) ;
// Update the document with triples and graph
const resultTriples = result . triples || triples ; // Fall back to original triples if none returned
console . log ( ` Updating document status to "Processed" with ${ resultTriples . length } triples ` ) ;
updateDocumentStatus ( doc . id , "Processed" , {
content ,
triples : resultTriples ,
graph : triplesToGraph ( resultTriples ) ,
extractedDate : new Date ( ) ,
processingMethod : useGraphTransformer ? 'graphtransformer' : 'langchain'
} ) ;
} catch ( processingError ) {
console . error ( ` Error in LangChain processing for ${ doc . name } : ` , processingError ) ;
// If we have fallback triples, still mark as processed but include the error
if ( triples . length > 0 ) {
console . log ( ` Using ${ triples . length } fallback triples despite processing error ` ) ;
updateDocumentStatus ( doc . id , "Processed" , {
content ,
triples ,
graph : triplesToGraph ( triples ) ,
extractedDate : new Date ( ) ,
error : processingError instanceof Error ? processingError . message : "Unknown error during LangChain processing" ,
processingMethod : 'fallback'
} ) ;
} else {
// If no fallback triples, mark as error
throw processingError ;
}
}
} else {
// Use default processing (original implementation)
console . log ( ` Processing document ${ doc . name } using default processor... ` ) ;
// Note: CSV files are handled above, so this only processes non-CSV files
{
// For non-CSV files, use the text chunking approach
console . log ( ` Processing text document with chunking: ${ doc . name } ` ) ;
// Convert JSON to text if it's a JSON file
let processedContent = content ;
if ( doc . name . toLowerCase ( ) . endsWith ( '.json' ) ) {
processedContent = convertJsonToText ( content ) ;
console . log ( ` Converted JSON file ${ doc . name } to text format for processing ` ) ;
}
// Use custom system prompt if available
const systemPrompt = promptConfigs ? . systemPrompt ;
const chunkSize = llmOptions ? . chunkSize || 512 ;
const overlapSize = llmOptions ? . overlapSize || 0 ;
const chunkingMethod = llmOptions ? . chunkingMethod || 'pyg' ;
let triples : Triple [ ] ;
if ( chunkingMethod === 'pyg' ) {
// Use PyTorch Geometric's exact chunking method with configurable chunk size and overlap
const pygChunkSize = chunkSize || 512 ; // Use configured chunk size or default to 512
const pygOverlapSize = overlapSize || 0 ; // Use configured overlap or default to 0 (original PyG behavior)
triples = await processTextWithChunkingPyG (
processedContent ,
( chunk ) = > extractTriplesFromChunk ( chunk , systemPrompt ) ,
pygChunkSize ,
pygOverlapSize
) ;
} else {
// Use optimized chunking with overlap
triples = await processTextWithChunking (
processedContent ,
( chunk ) = > extractTriplesFromChunk ( chunk , systemPrompt ) ,
chunkSize ,
overlapSize
) ;
}
// Send to process-document API - no longer automatically stores in Neo4j
// Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button
const requestBody : any = {
text : processedContent ,
filename : doc.name ,
triples : triples ,
useLangChain : false
} ;
// Add system prompt if available
if ( promptConfigs ? . systemPrompt ) {
requestBody . systemPrompt = promptConfigs . systemPrompt ;
}
const response = await fetch ( '/api/process-document' , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( requestBody )
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Document processing failed: ${ response . statusText } ` ) ;
}
// Update the document with triples and graph
updateDocumentStatus ( doc . id , "Processed" , {
content ,
triples ,
graph : triplesToGraph ( triples ) ,
chunkCount : Math.ceil ( content . length / 512 ) , // Approximate chunk count
extractedDate : new Date ( )
} ) ;
}
}
} catch ( error ) {
console . error ( ` Error processing document ${ doc . name } : ` , error ) ;
updateDocumentStatus ( doc . id , "Error" , {
error : error instanceof Error ? error . message : "Unknown error"
} ) ;
}
}
} finally {
// Add a small delay before turning off the processing state
// This gives time for all UI updates to complete
console . log ( "Processing complete, finalizing UI updates..." ) ;
// Force a final UI refresh by dispatching an event immediately
if ( typeof window !== 'undefined' ) {
console . log ( "Dispatching processing-complete event" ) ;
window . dispatchEvent ( new CustomEvent ( 'processing-complete' ) ) ;
}
// Reset the processing state
setIsProcessing ( false ) ;
console . log ( "Processing state reset, UI should be updated" ) ;
}
}
// Helper function to process CSV content - each row as a document for LLM extraction
const parseCSVContent = async ( csvContent : string ) : Promise < Triple [ ] > = > {
console . log ( '🔍 parseCSVContent called with content length:' , csvContent . length ) ;
console . log ( 'Processing CSV content with row-as-document approach' ) ;
// Split the CSV content into lines
const lines = csvContent . split ( '\n' ) . filter ( line = > line . trim ( ) . length > 0 ) ;
if ( lines . length < 2 ) {
throw new Error ( "CSV file must contain a header row and at least one data row" ) ;
}
// Parse the header row
const header = lines [ 0 ] . split ( ',' ) . map ( h = > h . trim ( ) . replace ( /^"(.*)"$/ , '$1' ) ) ;
console . log ( ` CSV headers: ${ header . join ( ', ' ) } ` ) ;
// Get data rows (skip header)
const dataRows = lines . slice ( 1 ) ;
console . log ( ` Processing ${ dataRows . length } data rows as individual documents ` ) ;
let allTriples : Triple [ ] = [ ] ;
const BATCH_SIZE = 50 ; // Store every 50 rows
let currentBatch : Triple [ ] = [ ] ;
let storedTriples = 0 ;
// Process each row as a separate document
for ( let rowIdx = 0 ; rowIdx < dataRows . length ; rowIdx ++ ) {
const line = dataRows [ rowIdx ] ;
try {
// Parse CSV row into fields
const fields : string [ ] = [ ] ;
let fieldStart = 0 ;
let inQuotes = false ;
for ( let i = 0 ; i < line . length ; i ++ ) {
if ( line [ i ] === '"' ) {
inQuotes = ! inQuotes ;
} else if ( line [ i ] === ',' && ! inQuotes ) {
fields . push ( line . substring ( fieldStart , i ) . trim ( ) . replace ( /^"(.*)"$/ , '$1' ) ) ;
fieldStart = i + 1 ;
}
}
// Add the last field
fields . push ( line . substring ( fieldStart ) . trim ( ) . replace ( /^"(.*)"$/ , '$1' ) ) ;
// Create document text from the row data
let documentText = '' ;
for ( let i = 0 ; i < Math . min ( header . length , fields . length ) ; i ++ ) {
if ( fields [ i ] && fields [ i ] . trim ( ) ) {
documentText += ` ${ header [ i ] } : ${ fields [ i ] } \ n ` ;
}
}
// Skip empty rows
if ( ! documentText . trim ( ) ) {
console . warn ( ` Skipping empty CSV row ${ rowIdx + 1 } ` ) ;
continue ;
}
console . log ( ` Processing row ${ rowIdx + 1 } as document: ${ documentText . substring ( 0 , 100 ) } ... ` ) ;
// Extract triples from this row's text using the existing extraction function
try {
console . log ( ` 🔄 Calling extractTriplesFromChunk for row ${ rowIdx + 1 } ` ) ;
// Note: promptConfigs is not available in this scope, so we'll pass undefined for now
const rowTriples = await extractTriplesFromChunk ( documentText , undefined ) ;
console . log ( ` 📥 extractTriplesFromChunk returned: ` , rowTriples ) ;
if ( rowTriples && Array . isArray ( rowTriples ) ) {
console . log ( ` ✅ Extracted ${ rowTriples . length } triples from row ${ rowIdx + 1 } ` ) ;
allTriples = allTriples . concat ( rowTriples ) ;
currentBatch = currentBatch . concat ( rowTriples ) ;
// Store batch every BATCH_SIZE rows or on last row
if ( currentBatch . length >= BATCH_SIZE || rowIdx === dataRows . length - 1 ) {
try {
console . log ( ` 💾 Storing batch: ${ currentBatch . length } triples (rows ${ storedTriples + 1 } - ${ rowIdx + 1 } ) ` ) ;
// Store batch to database via API
const batchResponse = await fetch ( '/api/graph-db/triples' , {
method : 'POST' ,
headers : { 'Content-Type' : 'application/json' } ,
body : JSON.stringify ( {
triples : currentBatch ,
source : ` CSV batch ${ Math . floor ( storedTriples / BATCH_SIZE ) + 1 } `
} )
} ) ;
if ( batchResponse . ok ) {
storedTriples += currentBatch . length ;
console . log ( ` ✅ Batch stored successfully! Progress: ${ storedTriples } total triples stored ` ) ;
} else {
console . error ( ` ❌ Failed to store batch: ${ batchResponse . statusText } ` ) ;
// Continue processing even if storage fails
}
currentBatch = [ ] ; // Reset batch
} catch ( batchError ) {
console . error ( ` ❌ Error storing batch at row ${ rowIdx + 1 } : ` , batchError ) ;
// Continue processing even if one batch fails
}
}
} else {
console . warn ( ` ⚠️ No valid triples returned for row ${ rowIdx + 1 } ` ) ;
}
} catch ( error ) {
console . error ( ` ❌ Error extracting triples from row ${ rowIdx + 1 } : ` , error ) ;
continue ;
}
} catch ( parseError ) {
console . error ( ` Error parsing CSV row ${ rowIdx + 1 } : ` , parseError ) ;
continue ;
}
}
console . log ( ` 🏁 Successfully extracted ${ allTriples . length } triples from ${ dataRows . length } CSV rows ` ) ;
console . log ( 'Final triples array:' , allTriples ) ;
return allTriples ;
}
// Helper function to convert JSON content to readable text format
const convertJsonToText = ( jsonContent : string ) : string = > {
try {
// Parse the JSON to validate it
const jsonData = JSON . parse ( jsonContent ) ;
// Convert JSON to a readable text format that preserves structure and relationships
const formatJsonObject = ( obj : any , indent : number = 0 ) : string = > {
const spaces = ' ' . repeat ( indent ) ;
if ( obj === null || obj === undefined ) {
return 'null' ;
}
if ( typeof obj === 'string' || typeof obj === 'number' || typeof obj === 'boolean' ) {
return String ( obj ) ;
}
if ( Array . isArray ( obj ) ) {
if ( obj . length === 0 ) return '[]' ;
const items = obj . map ( ( item , index ) = >
` ${ spaces } Item ${ index + 1 } : ${ formatJsonObject ( item , indent + 1 ) } `
) . join ( '\n' ) ;
return ` [ \ n ${ items } \ n ${ spaces } ] ` ;
}
if ( typeof obj === 'object' ) {
const entries = Object . entries ( obj ) ;
if ( entries . length === 0 ) return '{}' ;
const props = entries . map ( ( [ key , value ] ) = >
` ${ spaces } ${ key } : ${ formatJsonObject ( value , indent + 1 ) } `
) . join ( '\n' ) ;
return ` { \ n ${ props } \ n ${ spaces } } ` ;
}
return String ( obj ) ;
} ;
// Create a descriptive text representation
let textContent = ` JSON Document Content: \ n \ n ` ;
textContent += formatJsonObject ( jsonData ) ;
return textContent ;
} catch ( error ) {
console . warn ( 'Failed to parse JSON, treating as plain text:' , error ) ;
// If JSON parsing fails, return the original content as-is
return jsonContent ;
}
}
const openGraphVisualization = async ( documentId? : string ) = > {
// Find the document to visualize
const doc = documentId
? documents . find ( ( d ) = > d . id === documentId && d . status === "Processed" && d . triples && d . triples . length > 0 )
: documents . find ( ( d ) = > d . status === "Processed" && d . triples && d . triples . length > 0 )
if ( ! doc || ! doc . triples ) {
console . warn ( "No suitable document found for graph visualization" )
return
}
try {
// Create a timestamp to ensure we have unique localStorage keys that don't conflict
const timestamp = Date . now ( ) ;
// Always store in localStorage as a backup with a timestamp suffix
try {
// Store with both the old keys (for backward compatibility) and new timestamped keys
localStorage . setItem ( "graphTriples" , JSON . stringify ( doc . triples ) )
localStorage . setItem ( "graphDocumentName" , doc . name )
// Also store with timestamp for uniqueness
localStorage . setItem ( ` graphTriples_ ${ timestamp } ` , JSON . stringify ( doc . triples ) )
localStorage . setItem ( ` graphDocumentName_ ${ timestamp } ` , doc . name )
console . log ( ` Stored ${ doc . triples . length } triples in localStorage for document: ${ doc . name } ` )
} catch ( localStorageError ) {
console . error ( "LocalStorage error:" , localStorageError ) ;
alert ( "Warning: Unable to save graph data to browser storage. The graph may not persist if you navigate away." ) ;
// Continue with API storage even if localStorage fails
}
// Try the API approach
try {
const response = await fetch ( "/api/graph-data" , {
method : "POST" ,
headers : {
"Content-Type" : "application/json" ,
} ,
body : JSON.stringify ( {
triples : doc.triples ,
documentName : doc.name ,
timestamp // Include timestamp for correlation
} ) ,
} )
if ( response . ok ) {
const { graphId } = await response . json ( )
console . log ( ` Successfully stored graph data with ID: ${ graphId } ` )
// Use Next.js router.replace to avoid building up history stack
router . replace ( ` /graph?id= ${ graphId } &ts= ${ timestamp } ` )
} else {
console . warn ( ` API storage failed ( ${ response . status } ): ${ await response . text ( ) } ` )
// If API fails, use localStorage fallback with timestamp parameter
router . replace ( ` /graph?source=local&ts= ${ timestamp } ` )
}
} catch ( apiError ) {
console . error ( "Error with API storage:" , apiError )
// Navigate using localStorage fallback with timestamp
router . replace ( ` /graph?source=local&ts= ${ timestamp } ` )
}
} catch ( error ) {
console . error ( "Error preparing graph data:" , error )
alert ( "Failed to prepare graph data. See console for details." )
}
}
const generateEmbeddings = async ( documentId : string ) = > {
// Add more detailed diagnostics
const doc = documents . find ( d = > d . id === documentId ) ;
if ( ! doc ) {
toast ( {
title : "Document Not Found" ,
description : ` Could not find document with ID: ${ documentId } ` ,
variant : "destructive" ,
duration : 3000 ,
} ) ;
return ;
}
// If content already exists, use it right away
if ( doc . content && doc . content . trim ( ) !== '' ) {
await processEmbeddings ( doc . id , doc . name , doc . content ) ;
return ;
}
// Document exists but content is not loaded - log debug info
console . log ( ` Attempting to load content for document: ${ doc . name } ` ) ;
console . log ( ` File info: size= ${ doc . file . size } , type= ${ doc . file . type } ` ) ;
// Check if the document was loaded from localStorage and might have a corrupted file reference
const isLikelyFromLocalStorage = doc . file . size === 0 || ! ( doc . file instanceof Blob ) ;
if ( isLikelyFromLocalStorage ) {
toast ( {
title : "File Reference Issue" ,
description : "This document was restored from browser storage and cannot access its original file. Please re-upload the file or process it again first." ,
variant : "destructive" ,
duration : 5000 ,
} ) ;
return ;
}
try {
// Document exists but content might not be loaded - try to load it
const content = await readFileContent ( doc . file ) ;
if ( content && content . trim ( ) !== '' ) {
// Update the document with content first
setDocuments ( prevDocs = >
prevDocs . map ( d = > {
if ( d . id === documentId ) {
return {
. . . d ,
content : content
} ;
}
return d ;
} )
) ;
// Continue with the loaded content
await processEmbeddings ( doc . id , doc . name , content ) ;
} else {
toast ( {
title : "Empty Document" ,
description : "The document file appears to be empty" ,
variant : "destructive" ,
duration : 3000 ,
} ) ;
}
} catch ( error ) {
toast ( {
title : "Content Loading Error" ,
description : ` Failed to load document content: ${ error instanceof Error ? error.message : String ( error ) } ` ,
variant : "destructive" ,
duration : 5000 ,
} ) ;
}
} ;
// Helper function to handle the actual embeddings processing
const processEmbeddings = async ( documentId : string , documentName : string , content : string ) = > {
setIsGeneratingEmbeddings ( true ) ;
try {
console . log ( ` Generating embeddings for document: ${ documentName } ` ) ;
// Update embeddings status to show it's processing, without changing main document status
setDocuments ( prevDocs = >
prevDocs . map ( d = > {
if ( d . id === documentId ) {
return {
. . . d ,
embeddings : {
count : d.embeddings?.count || 0 ,
generated : d.embeddings?.generated || new Date ( ) ,
status : "Processing" as const
}
} ;
}
return d ;
} )
) ;
const response = await fetch ( '/api/embeddings' , {
method : 'POST' ,
headers : {
'Content-Type' : 'application/json'
} ,
body : JSON.stringify ( {
documentId : documentId ,
content : content ,
documentName : documentName
} )
} ) ;
if ( ! response . ok ) {
throw new Error ( ` Failed to generate embeddings: ${ await response . text ( ) } ` ) ;
}
const result = await response . json ( ) ;
console . log ( 'Embeddings generation result:' , result ) ;
// Update embeddings status to show it's processed
setDocuments ( prevDocs = >
prevDocs . map ( d = > {
if ( d . id === documentId ) {
return {
. . . d ,
embeddings : {
count : result.embeddings ,
generated : new Date ( ) ,
status : "Processed" as const
}
} ;
}
return d ;
} )
) ;
// Show a toast notification
toast ( {
title : "Embeddings Generated" ,
description : ` Successfully generated ${ result . embeddings } embeddings for " ${ documentName } " ` ,
duration : 5000 ,
} ) ;
} catch ( error ) {
console . error ( 'Error generating embeddings:' , error ) ;
// Update embeddings status to show there was an error
setDocuments ( prevDocs = >
prevDocs . map ( d = > {
if ( d . id === documentId ) {
return {
. . . d ,
embeddings : {
count : d.embeddings?.count || 0 ,
generated : d.embeddings?.generated || new Date ( ) ,
status : "Error" as const ,
error : error instanceof Error ? error.message : String ( error )
}
} ;
}
return d ;
} )
) ;
toast ( {
title : "Embeddings Generation Failed" ,
description : ` Failed to generate embeddings: ${ error instanceof Error ? error.message : String ( error ) } ` ,
variant : "destructive" ,
duration : 5000 ,
} ) ;
} finally {
setIsGeneratingEmbeddings ( false ) ;
}
} ;
return (
< DocumentContext.Provider
value = { {
documents ,
addDocuments ,
deleteDocuments ,
clearDocuments ,
processDocuments ,
processDocumentsLegacy ,
isProcessing ,
updateTriples ,
addTriple ,
editTriple ,
deleteTriple ,
openGraphVisualization ,
generateEmbeddings ,
isGeneratingEmbeddings
} }
>
{ children }
< / DocumentContext.Provider >
)
}
export function useDocuments() {
const context = useContext ( DocumentContext )
if ( context === undefined ) {
throw new Error ( "useDocuments must be used within a DocumentProvider" )
}
return context
}