dgx-spark-playbooks/nvidia/txt2kg/assets/frontend/contexts/document-context.tsx

//
// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
"use client"

import type React from "react"

import { createContext, useContext, useState, useEffect } from "react"
import { type Triple, processTextWithChunking, processTextWithChunkingPyG, triplesToGraph } from "@/utils/text-processing"
import { useRouter } from "next/navigation"
import { toast } from "@/hooks/use-toast"
import { type PromptConfigurations } from "@/components/prompt-configuration"

export type Document = {
  id: string
  name: string
  status: "New" | "Processing" | "Processed" | "Error"
  uploadStatus: "Uploading" | "Uploaded"
  size: string
  file: File
  content?: string
  triples?: Triple[]
  graph?: {
    nodes: Array<{ id: string; label: string }>
    edges: Array<{ source: string; target: string; label: string }>
  }
  error?: string
  chunkCount?: number
  extractedDate?: Date
  processingMethod?: 'default' | 'langchain' | 'graphtransformer' | 'fallback'
  embeddings?: {
    count: number
    generated: Date
    status: "New" | "Processing" | "Processed" | "Error"
    error?: string
  }
}

export type LLMProvider = 'nvidia' | 'ollama';

export type ProcessingOptions = {
  useLangChain?: boolean;
  useGraphTransformer?: boolean;
  promptConfigs?: PromptConfigurations;
  llmProvider?: LLMProvider;
  ollamaModel?: string;
  ollamaBaseUrl?: string;
  chunkSize?: number;
  overlapSize?: number;
  chunkingMethod?: 'optimized' | 'pyg';
};

type DocumentContextType = {
  documents: Document[]
  addDocuments: (files: File[]) => void
  deleteDocuments: (documentIds: string[]) => void
  clearDocuments: () => void
  processDocuments: (selectedDocIds?: string[], options?: ProcessingOptions) => Promise<void>
  // Legacy method for backward compatibility
  processDocumentsLegacy: (useLangChain: boolean, selectedDocIds?: string[], useGraphTransformer?: boolean, promptConfigs?: PromptConfigurations) => Promise<void>
  isProcessing: boolean
  updateTriples: (documentId: string, triples: Triple[]) => void
  addTriple: (documentId: string, triple: Triple) => void
  editTriple: (documentId: string, index: number, triple: Triple) => void
  deleteTriple: (documentId: string, index: number) => void
  openGraphVisualization: (documentId?: string) => Promise<void>
  generateEmbeddings: (documentId: string) => Promise<void>
  isGeneratingEmbeddings: boolean
  viewTriples?: (documentId: string) => void
}

const DocumentContext = createContext<DocumentContextType | undefined>(undefined)

// Utility function to generate UUID with fallback
const generateUUID = (): string => {
  // Check if crypto.randomUUID is available
  if (typeof crypto !== 'undefined' && crypto.randomUUID) {
    try {
      return crypto.randomUUID();
    } catch (error) {
      console.warn('crypto.randomUUID failed, using fallback:', error);
    }
  }

  // Fallback UUID generation
  return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
    const r = Math.random() * 16 | 0;
    const v = c == 'x' ? r : (r & 0x3 | 0x8);
    return v.toString(16);
  });
};

export function DocumentProvider({ children }: { children: React.ReactNode }) {
  const router = useRouter()
  const [documents, setDocuments] = useState<Document[]>([])
  const [isInitialized, setIsInitialized] = useState(false)
  const [isProcessing, setIsProcessing] = useState(false)
  const [isGeneratingEmbeddings, setIsGeneratingEmbeddings] = useState(false)
  const [apiKey, setApiKey] = useState<string | null>(null)

  // Load API key from localStorage on client-side only
  useEffect(() => {
    if (typeof window !== 'undefined') {
      // API key loading removed - xAI integration has been removed
    }
  }, []);

  // Load from localStorage on client-side only
  useEffect(() => {
    if (!isInitialized) {
      try {
        const savedDocuments = localStorage.getItem('txt2kg_documents')
        if (savedDocuments) {
          const parsedDocuments = JSON.parse(savedDocuments)

          // Reconstruct documents with placeholder File objects
          const reconstructedDocs = parsedDocuments.map((doc: any) => {
            // Create a blob from the content if available
            let file: File;
            if (doc.content) {
              // Create a File object from the content string we previously saved
              const blob = new Blob([doc.content], { type: 'text/plain' });
              file = new File([blob], doc.name, { type: 'text/plain' });
            } else {
              // Create an empty placeholder if no content is available
              file = new File([], doc.name, { type: 'text/plain' });
            }

            return {
              ...doc,
              file
            };
          });

          console.log(`Restored ${reconstructedDocs.length} documents from localStorage`);
          setDocuments(reconstructedDocs);
        }
      } catch (error) {
        console.error('Error loading documents from localStorage:', error);
      }

      setIsInitialized(true);
    }
  }, [isInitialized]);

  // Save documents to localStorage whenever they change, but only after initialization
  useEffect(() => {
    if (isInitialized) {
      try {
        if (documents.length > 0) {
          // Serialize documents for localStorage storage
          // We need to ensure large documents don't exceed localStorage limits
          // Focus on saving processed data (triples & graph) rather than raw content for large files
          const documentsToSave = documents.map(doc => {
            // Don't save content for very large documents to avoid localStorage limits
            // But keep it for smaller ones to avoid reprocessing
            const shouldSaveContent = !doc.content || doc.content.length < 100000;

            return {
              ...doc,
              // Omit the actual File object as it can't be serialized
              file: {
                name: doc.file.name,
                size: doc.file.size,
                type: doc.file.type
              },
              // Only include content for smaller documents
              content: shouldSaveContent ? doc.content : undefined
            };
          });

          localStorage.setItem('txt2kg_documents', JSON.stringify(documentsToSave));
          console.log(`Saved ${documents.length} documents to localStorage`);
        } else {
          // Clear localStorage if documents array is empty
          localStorage.removeItem('txt2kg_documents');
          console.log('Cleared documents from localStorage');
        }
      } catch (error) {
        console.error('Error saving documents to localStorage:', error);
      }
    }
  }, [documents, isInitialized])

  const addDocuments = (files: File[]) => {
    const newDocuments = files.map((file) => ({
      id: generateUUID(),
      name: file.name,
      status: "New" as const,
      uploadStatus: "Uploaded" as const,
      size: (file.size / 1024).toFixed(2), // Convert to KB
      file,
    }))

    setDocuments((prev) => [...prev, ...newDocuments])
  }

  const deleteDocuments = (documentIds: string[]) => {
    setDocuments((prev) => prev.filter((doc) => !documentIds.includes(doc.id)))
  }

  const clearDocuments = () => {
    setDocuments([])
  }

  const updateDocumentStatus = (id: string, status: Document["status"], updates: Partial<Document> = {}) => {
    console.log(`Updating document ${id} status to: ${status}`);
    setDocuments((prev) => {
      const updated = prev.map((doc) => (doc.id === id ? { ...doc, status, ...updates } : doc));

      // Force UI refresh by adding timestamp to document state
      // This ensures React detects the change and re-renders components
      const timestamped = updated.map(doc => ({
        ...doc,
        _lastUpdated: Date.now() // Adding timestamp helps React detect changes
      }));

      return timestamped;
    });

    // Trigger a custom event for components that need to refresh
    if (typeof window !== 'undefined') {
      console.log('Dispatching document-status-changed event');
      window.dispatchEvent(new CustomEvent('document-status-changed', {
        detail: { documentId: id, status }
      }));
    }
  }

  const updateTriples = (documentId: string, triples: Triple[]) => {
    // Helper function to normalize text
    const normalizeText = (text: string): string => {
      return text.replace(/['"()]/g, '').trim();
    };

    // Normalize triples before saving
    const normalizedTriples = triples.map(triple => ({
      subject: normalizeText(triple.subject),
      predicate: normalizeText(triple.predicate),
      object: normalizeText(triple.object)
    }));

    setDocuments((prev) =>
      prev.map((doc) => {
        if (doc.id === documentId) {
          const graph = triplesToGraph(normalizedTriples)
          return { ...doc, triples: normalizedTriples, graph }
        }
        return doc
      }),
    )
  }

  const addTriple = (documentId: string, triple: Triple) => {
    // Helper function to normalize text with null/undefined checks
    const normalizeText = (text: string | null | undefined): string => {
      if (!text || typeof text !== 'string') return '';
      return text.replace(/['"()]/g, '').trim();
    };

    // Normalize the new triple
    const normalizedTriple = {
      subject: normalizeText(triple.subject),
      predicate: normalizeText(triple.predicate),
      object: normalizeText(triple.object)
    };

    setDocuments((prev) =>
      prev.map((doc) => {
        if (doc.id === documentId && doc.triples) {
          const newTriples = [...doc.triples, normalizedTriple]
          const graph = triplesToGraph(newTriples)
          return { ...doc, triples: newTriples, graph }
        }
        return doc
      }),
    )
  }

  const editTriple = (documentId: string, index: number, triple: Triple) => {
    // Helper function to normalize text with null/undefined checks
    const normalizeText = (text: string | null | undefined): string => {
      if (!text || typeof text !== 'string') return '';
      return text.replace(/['"()]/g, '').trim();
    };

    // Normalize the edited triple
    const normalizedTriple = {
      subject: normalizeText(triple.subject),
      predicate: normalizeText(triple.predicate),
      object: normalizeText(triple.object)
    };

    setDocuments((prev) =>
      prev.map((doc) => {
        if (doc.id === documentId && doc.triples) {
          const newTriples = [...doc.triples]
          newTriples[index] = normalizedTriple
          const graph = triplesToGraph(newTriples)
          return { ...doc, triples: newTriples, graph }
        }
        return doc
      }),
    )
  }

  const deleteTriple = (documentId: string, index: number) => {
    setDocuments((prev) =>
      prev.map((doc) => {
        if (doc.id === documentId && doc.triples) {
          const newTriples = doc.triples.filter((_, i) => i !== index)
          const graph = triplesToGraph(newTriples)
          return { ...doc, triples: newTriples, graph }
        }
        return doc
      }),
    )
  }

  const readFileContent = (file: File): Promise<string> => {
    return new Promise((resolve, reject) => {
      // Check if it's a valid file with size
      if (file.size === 0) {
        // Handle zero-byte files
        console.warn(`File ${file.name} is empty (0 bytes)`);
        reject(new Error('File is empty (0 bytes)'));
        return;
      }

      // If the file isn't a real file (like from localStorage), handle that case
      if (!(file instanceof Blob) || (file.size === 0 && file.type === '')) {
        console.warn(`File ${file.name} appears to be a placeholder or invalid`);
        reject(new Error('Invalid file reference - likely a placeholder'));
        return;
      }

      const reader = new FileReader();
      reader.onload = (e) => {
        const content = e.target?.result as string;
        if (!content || content.trim() === '') {
          console.warn(`File ${file.name} content is empty or whitespace only`);
          reject(new Error('File content is empty'));
          return;
        }
        resolve(content);
      };
      reader.onerror = (e) => {
        console.error(`Error reading file ${file.name}:`, e);
        reject(e);
      };
      reader.readAsText(file);
    });
  }

  const extractTriplesFromChunk = async (chunk: string, systemPrompt?: string): Promise<Triple[]> => {
    console.log(`Extracting triples from chunk of length: ${chunk.length}`)

    // Create headers with API key if available
    const headers: Record<string, string> = {
      "Content-Type": "application/json",
    }

    // Add API key to headers if available
    if (apiKey) {
      headers["X-API-Key"] = apiKey
    }

    // Prepare request body with optional custom system prompt
    const requestBody: any = { text: chunk };
    if (systemPrompt) {
      requestBody.systemPrompt = systemPrompt;
    }

    // Add LLM provider information based on selected model
    const selectedModel = localStorage.getItem("selectedModel");
    if (selectedModel) {
      try {
        const model = JSON.parse(selectedModel);
        if (model.provider === "ollama") {
          requestBody.llmProvider = "ollama";
          requestBody.ollamaModel = model.model || "llama3.1:8b";
          console.log(`🦙 Using Ollama model: ${requestBody.ollamaModel}`);
        } else if (model.id === "nvidia-nemotron" || model.id === "nvidia-nemotron-nano") {
          requestBody.llmProvider = "nvidia";
          requestBody.nvidiaModel = model.model; // Pass the actual model name
          console.log(`🖥️ Using NVIDIA model: ${model.model}`);
        }
      } catch (e) {
        // Ignore parsing errors, will use default
        console.log(`⚠️ Error parsing selected model, using default`);
      }
    } else {
      console.log(`⚠️ No selected model found, using default`);
    }

    const response = await fetch("/api/extract-triples", {
      method: "POST",
      headers,
      body: JSON.stringify(requestBody),
      // Rely on server-side timeout configuration instead of client-side AbortSignal
    })

    console.log("API response status:", response.status)

    const data = await response.json()

    if (!response.ok) {
      console.error("API error:", data)
      throw new Error(data.error || "Failed to extract triples")
    }

    console.log("API response data:", data)
    console.log("Triples count:", data.triples?.length || 0)

    return data.triples || []
  }

  // New processDocuments method with better options structure
  const processDocuments = async (
    selectedDocIds?: string[],
    options?: ProcessingOptions
  ) => {
    console.log('🔍 processDocuments called with:', {
      selectedDocIds,
      selectedCount: selectedDocIds?.length || 0,
      options
    });

    const {
      useLangChain = false,
      useGraphTransformer = false,
      promptConfigs,
      llmProvider = 'ollama',
      ollamaModel = 'qwen3:1.7b',
      ollamaBaseUrl = 'http://localhost:11434/v1',
      chunkSize = 64000,
      overlapSize = 2000,
      chunkingMethod = 'optimized'
    } = options || {};

    return processDocumentsImpl(useLangChain, selectedDocIds, useGraphTransformer, promptConfigs, {
      llmProvider,
      ollamaModel,
      ollamaBaseUrl,
      chunkSize,
      overlapSize,
      chunkingMethod
    });
  };

  // Legacy method for backward compatibility
  const processDocumentsLegacy = async (
    useLangChain: boolean,
    selectedDocIds?: string[],
    useGraphTransformer?: boolean,
    promptConfigs?: PromptConfigurations
  ) => {
    return processDocumentsImpl(useLangChain, selectedDocIds, useGraphTransformer, promptConfigs);
  };

  const processDocumentsImpl = async (
    useLangChain: boolean,
    selectedDocIds?: string[],
    useGraphTransformer?: boolean,
    promptConfigs?: PromptConfigurations,
    llmOptions?: {
      llmProvider?: LLMProvider;
      ollamaModel?: string;
      ollamaBaseUrl?: string;
      chunkSize?: number;
      overlapSize?: number;
      chunkingMethod?: 'optimized' | 'pyg';
    }
  ) => {
    console.log('🔍 processDocumentsImpl called with:', {
      useLangChain,
      selectedDocIds,
      selectedCount: selectedDocIds?.length || 0,
      useGraphTransformer,
      totalDocuments: documents.length
    });

    // If selectedDocIds is explicitly provided, use it
    // If not provided, don't process anything (instead of processing all docs)
    const docIdsToProcess = selectedDocIds || [];

    console.log('🔍 Document IDs to process:', docIdsToProcess);

    // Get selected documents - filter by the provided selectedDocIds array
    const docsToProcess = documents.filter(
      (doc) => docIdsToProcess.includes(doc.id) &&
      (doc.status === "New" || doc.status === "Processed" || doc.status === "Error")
    );

    console.log('🔍 Documents to process:', docsToProcess.map(d => ({ id: d.id, name: d.name, status: d.status })));

    if (docsToProcess.length === 0) {
      console.log("❌ No documents to process - either none selected or none have valid status");
      return;
    }

    setIsProcessing(true);

    try {
      // Check which documents are already processed in ArangoDB
      console.log('🔍 Checking which documents are already processed in ArangoDB...');
      let alreadyProcessedDocs: Set<string> = new Set();

      try {
        const response = await fetch('/api/graph-db/check-document', {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({
            documentNames: docsToProcess.map(d => d.name)
          })
        });

        if (response.ok) {
          const result = await response.json();
          if (result.processedDocuments) {
            Object.entries(result.processedDocuments).forEach(([docName, isProcessed]) => {
              if (isProcessed) {
                alreadyProcessedDocs.add(docName);
              }
            });
            console.log(`✅ Found ${alreadyProcessedDocs.size} documents already processed in ArangoDB:`, Array.from(alreadyProcessedDocs));
          }
        }
      } catch (checkError) {
        console.warn('⚠️ Could not check for already processed documents, continuing anyway:', checkError);
      }

      // Process each document sequentially
      for (const doc of docsToProcess) {
        // Skip if document is already processed in ArangoDB
        if (alreadyProcessedDocs.has(doc.name)) {
          console.log(`⏭️ Skipping document "${doc.name}" - already processed in ArangoDB`);
          updateDocumentStatus(doc.id, "Processed", {
            triples: doc.triples || [],
            graph: doc.graph,
            error: undefined
          });
          toast({
            title: "Document Skipped",
            description: `"${doc.name}" is already stored in ArangoDB`,
            duration: 3000,
          });
          continue;
        }
        // Update status to Processing before we begin
        updateDocumentStatus(doc.id, "Processing");

        try {
          // Read file content if not already available
          let content = doc.content;
          if (!content) {
            content = await readFileContent(doc.file);
          }

          console.log(`🚀 Processing document ${doc.name}, useLangChain: ${useLangChain}, isCSV: ${doc.name.toLowerCase().endsWith('.csv')}`);

          // Handle CSV files specially - always use row-as-document processing regardless of LangChain setting
          if (doc.name.toLowerCase().endsWith('.csv')) {
            console.log('📊 Processing CSV file with row-as-document approach:', doc.name);

            try {
              const triples = await parseCSVContent(content);
              console.log(`✅ CSV processing complete: ${triples.length} triples extracted`);

              // Send to process-document API
              const response = await fetch('/api/process-document', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({
                  text: content,
                  filename: doc.name,
                  triples: triples,
                  useLangChain: useLangChain, // Pass through the original setting
                  useGraphTransformer: useGraphTransformer,
                  systemPrompt: promptConfigs?.systemPrompt,
                  extractionPrompt: promptConfigs?.extractionPrompt,
                  graphTransformerPrompt: promptConfigs?.graphTransformerPrompt
                })
              });

              if (!response.ok) {
                throw new Error(`Document processing failed: ${response.statusText}`);
              }

              const result = await response.json();

              // Update the document with triples and graph
              updateDocumentStatus(doc.id, "Processed", {
                triples: triples,
                graph: triplesToGraph(triples),
                metadata: {
                  totalTriples: triples.length,
                  processingMethod: 'csv_row_as_document',
                  langchainUsed: useLangChain,
                  graphTransformerUsed: useGraphTransformer
                }
              });

              console.log(`✅ Document ${doc.name} processed successfully with ${triples.length} triples`);
            } catch (error) {
              console.error(`❌ Error processing CSV file ${doc.name}:`, error);
              updateDocumentStatus(doc.id, "Error", undefined, error instanceof Error ? error.message : 'Unknown error');
            }

            continue; // Skip the rest of the processing for CSV files
          }

          if (useLangChain) {
            // Use process-document endpoint with useLangChain flag
            console.log(`Processing document ${doc.name} with LangChain via process-document API...`);

            // Extract triples using the default method first (for fallback)
            let triples: Triple[] = [];
            try {
              // Convert JSON to text if it's a JSON file
              let processedContent = content;
              if (doc.name.toLowerCase().endsWith('.json')) {
                processedContent = convertJsonToText(content);
              }

              // Pass the custom system prompt if available
              const systemPrompt = promptConfigs?.systemPrompt;
              triples = await processTextWithChunking(
                processedContent,
                (chunk) => extractTriplesFromChunk(chunk, systemPrompt)
              );

              // Call the process-document API endpoint with useLangChain flag
              // NOTE: This no longer automatically stores triples in Neo4j.
              // Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button.
              console.log(`Sending ${triples.length} triples to process-document API with useLangChain=true ${useGraphTransformer ? 'using GraphTransformer' : ''}`);

              // Include prompt configurations in the request body
              const requestBody: any = {
                text: doc.name.toLowerCase().endsWith('.json') ? convertJsonToText(content) : content,
                filename: doc.name,
                triples: triples,
                useLangChain: true,
                useGraphTransformer: useGraphTransformer
              };

              // Add LLM provider options if available
              if (llmOptions) {
                if (llmOptions.llmProvider) {
                  requestBody.llmProvider = llmOptions.llmProvider;
                }
                if (llmOptions.ollamaModel) {
                  requestBody.ollamaModel = llmOptions.ollamaModel;
                }
                if (llmOptions.ollamaBaseUrl) {
                  requestBody.ollamaBaseUrl = llmOptions.ollamaBaseUrl;
                }
              }

              // Add prompt configs if available
              if (promptConfigs) {
                if (useGraphTransformer && promptConfigs.graphTransformerPrompt) {
                  requestBody.graphTransformerPrompt = promptConfigs.graphTransformerPrompt;
                } else if (promptConfigs.defaultExtractionPrompt) {
                  requestBody.extractionPrompt = promptConfigs.defaultExtractionPrompt;
                }
              }

              const response = await fetch('/api/process-document', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify(requestBody)
              });

              if (!response.ok) {
                const errorText = await response.text();
                console.error(`Document processing API error: ${response.status} ${response.statusText}`, errorText);
                throw new Error(`Document processing failed: ${response.statusText} - ${errorText}`);
              }

              const result = await response.json();
              console.log(`Received response from process-document API with ${result.triples?.length || 0} triples`);

              // Update the document with triples and graph
              const resultTriples = result.triples || triples; // Fall back to original triples if none returned
              console.log(`Updating document status to "Processed" with ${resultTriples.length} triples`);
              updateDocumentStatus(doc.id, "Processed", {
                content,
                triples: resultTriples,
                graph: triplesToGraph(resultTriples),
                extractedDate: new Date(),
                processingMethod: useGraphTransformer ? 'graphtransformer' : 'langchain'
              });
            } catch (processingError) {
              console.error(`Error in LangChain processing for ${doc.name}:`, processingError);

              // If we have fallback triples, still mark as processed but include the error
              if (triples.length > 0) {
                console.log(`Using ${triples.length} fallback triples despite processing error`);
                updateDocumentStatus(doc.id, "Processed", {
                  content,
                  triples,
                  graph: triplesToGraph(triples),
                  extractedDate: new Date(),
                  error: processingError instanceof Error ? processingError.message : "Unknown error during LangChain processing",
                  processingMethod: 'fallback'
                });
              } else {
                // If no fallback triples, mark as error
                throw processingError;
              }
            }
          } else {
            // Use default processing (original implementation)
            console.log(`Processing document ${doc.name} using default processor...`);

            // Note: CSV files are handled above, so this only processes non-CSV files
            {
              // For non-CSV files, use the text chunking approach
              console.log(`Processing text document with chunking: ${doc.name}`);

              // Convert JSON to text if it's a JSON file
              let processedContent = content;
              if (doc.name.toLowerCase().endsWith('.json')) {
                processedContent = convertJsonToText(content);
                console.log(`Converted JSON file ${doc.name} to text format for processing`);
              }

              // Use custom system prompt if available
              const systemPrompt = promptConfigs?.systemPrompt;
              const chunkSize = llmOptions?.chunkSize || 512;
              const overlapSize = llmOptions?.overlapSize || 0;
              const chunkingMethod = llmOptions?.chunkingMethod || 'pyg';

              let triples: Triple[];
              if (chunkingMethod === 'pyg') {
                // Use PyTorch Geometric's exact chunking method with configurable chunk size and overlap
                const pygChunkSize = chunkSize || 512; // Use configured chunk size or default to 512
                const pygOverlapSize = overlapSize || 0; // Use configured overlap or default to 0 (original PyG behavior)
                triples = await processTextWithChunkingPyG(
                  processedContent,
                  (chunk) => extractTriplesFromChunk(chunk, systemPrompt),
                  pygChunkSize,
                  pygOverlapSize
                );
              } else {
                // Use optimized chunking with overlap
                triples = await processTextWithChunking(
                  processedContent,
                  (chunk) => extractTriplesFromChunk(chunk, systemPrompt),
                  chunkSize,
                  overlapSize
                );
              }

              // Send to process-document API - no longer automatically stores in Neo4j
              // Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button
              const requestBody: any = {
                text: processedContent,
                filename: doc.name,
                triples: triples,
                useLangChain: false
              };

              // Add system prompt if available
              if (promptConfigs?.systemPrompt) {
                requestBody.systemPrompt = promptConfigs.systemPrompt;
              }

              const response = await fetch('/api/process-document', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify(requestBody)
              });

              if (!response.ok) {
                throw new Error(`Document processing failed: ${response.statusText}`);
              }

              // Update the document with triples and graph
              updateDocumentStatus(doc.id, "Processed", {
                content,
                triples,
                graph: triplesToGraph(triples),
                chunkCount: Math.ceil(content.length / 512), // Approximate chunk count
                extractedDate: new Date()
              });
            }
          }
        } catch (error) {
          console.error(`Error processing document ${doc.name}:`, error);
          updateDocumentStatus(doc.id, "Error", {
            error: error instanceof Error ? error.message : "Unknown error"
          });
        }
      }
    } finally {
      // Add a small delay before turning off the processing state
      // This gives time for all UI updates to complete
      console.log("Processing complete, finalizing UI updates...");

      // Force a final UI refresh by dispatching an event immediately
      if (typeof window !== 'undefined') {
        console.log("Dispatching processing-complete event");
        window.dispatchEvent(new CustomEvent('processing-complete'));
      }

      // Reset the processing state
      setIsProcessing(false);
      console.log("Processing state reset, UI should be updated");
    }
  }

  // Helper function to process CSV content - each row as a document for LLM extraction
  const parseCSVContent = async (csvContent: string): Promise<Triple[]> => {
    console.log('🔍 parseCSVContent called with content length:', csvContent.length);
    console.log('Processing CSV content with row-as-document approach');

    // Split the CSV content into lines
    const lines = csvContent.split('\n').filter(line => line.trim().length > 0);

    if (lines.length < 2) {
      throw new Error("CSV file must contain a header row and at least one data row");
    }

    // Parse the header row
    const header = lines[0].split(',').map(h => h.trim().replace(/^"(.*)"$/, '$1'));
    console.log(`CSV headers: ${header.join(', ')}`);

    // Get data rows (skip header)
    const dataRows = lines.slice(1);
    console.log(`Processing ${dataRows.length} data rows as individual documents`);

    let allTriples: Triple[] = [];
    const BATCH_SIZE = 50; // Store every 50 rows
    let currentBatch: Triple[] = [];
    let storedTriples = 0;

    // Process each row as a separate document
    for (let rowIdx = 0; rowIdx < dataRows.length; rowIdx++) {
      const line = dataRows[rowIdx];

      try {
        // Parse CSV row into fields
        const fields: string[] = [];
        let fieldStart = 0;
        let inQuotes = false;

        for (let i = 0; i < line.length; i++) {
          if (line[i] === '"') {
            inQuotes = !inQuotes;
          } else if (line[i] === ',' && !inQuotes) {
            fields.push(line.substring(fieldStart, i).trim().replace(/^"(.*)"$/, '$1'));
            fieldStart = i + 1;
          }
        }

        // Add the last field
        fields.push(line.substring(fieldStart).trim().replace(/^"(.*)"$/, '$1'));

        // Create document text from the row data
        let documentText = '';
        for (let i = 0; i < Math.min(header.length, fields.length); i++) {
          if (fields[i] && fields[i].trim()) {
            documentText += `${header[i]}: ${fields[i]}\n`;
          }
        }

        // Skip empty rows
        if (!documentText.trim()) {
          console.warn(`Skipping empty CSV row ${rowIdx + 1}`);
          continue;
        }

        console.log(`Processing row ${rowIdx + 1} as document: ${documentText.substring(0, 100)}...`);

        // Extract triples from this row's text using the existing extraction function
        try {
          console.log(`🔄 Calling extractTriplesFromChunk for row ${rowIdx + 1}`);
          // Note: promptConfigs is not available in this scope, so we'll pass undefined for now
          const rowTriples = await extractTriplesFromChunk(documentText, undefined);

          console.log(`📥 extractTriplesFromChunk returned:`, rowTriples);

          if (rowTriples && Array.isArray(rowTriples)) {
            console.log(`✅ Extracted ${rowTriples.length} triples from row ${rowIdx + 1}`);
            allTriples = allTriples.concat(rowTriples);
            currentBatch = currentBatch.concat(rowTriples);

            // Store batch every BATCH_SIZE rows or on last row
            if (currentBatch.length >= BATCH_SIZE || rowIdx === dataRows.length - 1) {
              try {
                console.log(`💾 Storing batch: ${currentBatch.length} triples (rows ${storedTriples + 1}-${rowIdx + 1})`);

                // Store batch to database via API
                const batchResponse = await fetch('/api/graph-db/triples', {
                  method: 'POST',
                  headers: { 'Content-Type': 'application/json' },
                  body: JSON.stringify({
                    triples: currentBatch,
                    source: `CSV batch ${Math.floor(storedTriples / BATCH_SIZE) + 1}`
                  })
                });

                if (batchResponse.ok) {
                  storedTriples += currentBatch.length;
                  console.log(`✅ Batch stored successfully! Progress: ${storedTriples} total triples stored`);
                } else {
                  console.error(`❌ Failed to store batch: ${batchResponse.statusText}`);
                  // Continue processing even if storage fails
                }

                currentBatch = []; // Reset batch
              } catch (batchError) {
                console.error(`❌ Error storing batch at row ${rowIdx + 1}:`, batchError);
                // Continue processing even if one batch fails
              }
            }
          } else {
            console.warn(`⚠️ No valid triples returned for row ${rowIdx + 1}`);
          }
        } catch (error) {
          console.error(`❌ Error extracting triples from row ${rowIdx + 1}:`, error);
          continue;
        }

      } catch (parseError) {
        console.error(`Error parsing CSV row ${rowIdx + 1}:`, parseError);
        continue;
      }
    }

    console.log(`🏁 Successfully extracted ${allTriples.length} triples from ${dataRows.length} CSV rows`);
    console.log('Final triples array:', allTriples);
    return allTriples;
  }

  // Helper function to convert JSON content to readable text format
  const convertJsonToText = (jsonContent: string): string => {
    try {
      // Parse the JSON to validate it
      const jsonData = JSON.parse(jsonContent);

      // Convert JSON to a readable text format that preserves structure and relationships
      const formatJsonObject = (obj: any, indent: number = 0): string => {
        const spaces = '  '.repeat(indent);

        if (obj === null || obj === undefined) {
          return 'null';
        }

        if (typeof obj === 'string' || typeof obj === 'number' || typeof obj === 'boolean') {
          return String(obj);
        }

        if (Array.isArray(obj)) {
          if (obj.length === 0) return '[]';
          const items = obj.map((item, index) =>
            `${spaces}  Item ${index + 1}: ${formatJsonObject(item, indent + 1)}`
          ).join('\n');
          return `[\n${items}\n${spaces}]`;
        }

        if (typeof obj === 'object') {
          const entries = Object.entries(obj);
          if (entries.length === 0) return '{}';

          const props = entries.map(([key, value]) =>
            `${spaces}  ${key}: ${formatJsonObject(value, indent + 1)}`
          ).join('\n');
          return `{\n${props}\n${spaces}}`;
        }

        return String(obj);
      };

      // Create a descriptive text representation
      let textContent = `JSON Document Content:\n\n`;
      textContent += formatJsonObject(jsonData);

      return textContent;
    } catch (error) {
      console.warn('Failed to parse JSON, treating as plain text:', error);
      // If JSON parsing fails, return the original content as-is
      return jsonContent;
    }
  }

  const openGraphVisualization = async (documentId?: string) => {
    // Find the document to visualize
    const doc = documentId
      ? documents.find((d) => d.id === documentId && d.status === "Processed" && d.triples && d.triples.length > 0)
      : documents.find((d) => d.status === "Processed" && d.triples && d.triples.length > 0)

    if (!doc || !doc.triples) {
      console.warn("No suitable document found for graph visualization")
      return
    }

    try {
      // Create a timestamp to ensure we have unique localStorage keys that don't conflict
      const timestamp = Date.now();

      // Always store in localStorage as a backup with a timestamp suffix
      try {
        // Store with both the old keys (for backward compatibility) and new timestamped keys
        localStorage.setItem("graphTriples", JSON.stringify(doc.triples))
        localStorage.setItem("graphDocumentName", doc.name)

        // Also store with timestamp for uniqueness
        localStorage.setItem(`graphTriples_${timestamp}`, JSON.stringify(doc.triples))
        localStorage.setItem(`graphDocumentName_${timestamp}`, doc.name)

        console.log(`Stored ${doc.triples.length} triples in localStorage for document: ${doc.name}`)
      } catch (localStorageError) {
        console.error("LocalStorage error:", localStorageError);
        alert("Warning: Unable to save graph data to browser storage. The graph may not persist if you navigate away.");
        // Continue with API storage even if localStorage fails
      }

      // Try the API approach
      try {
        const response = await fetch("/api/graph-data", {
          method: "POST",
          headers: {
            "Content-Type": "application/json",
          },
          body: JSON.stringify({
            triples: doc.triples,
            documentName: doc.name,
            timestamp // Include timestamp for correlation
          }),
        })

        if (response.ok) {
          const { graphId } = await response.json()
          console.log(`Successfully stored graph data with ID: ${graphId}`)
          // Use Next.js router.replace to avoid building up history stack
          router.replace(`/graph?id=${graphId}&ts=${timestamp}`)
        } else {
          console.warn(`API storage failed (${response.status}): ${await response.text()}`)
          // If API fails, use localStorage fallback with timestamp parameter
          router.replace(`/graph?source=local&ts=${timestamp}`)
        }
      } catch (apiError) {
        console.error("Error with API storage:", apiError)
        // Navigate using localStorage fallback with timestamp
        router.replace(`/graph?source=local&ts=${timestamp}`)
      }
    } catch (error) {
      console.error("Error preparing graph data:", error)
      alert("Failed to prepare graph data. See console for details.")
    }
  }

  const generateEmbeddings = async (documentId: string) => {
    // Add more detailed diagnostics
    const doc = documents.find(d => d.id === documentId);

    if (!doc) {
      toast({
        title: "Document Not Found",
        description: `Could not find document with ID: ${documentId}`,
        variant: "destructive",
        duration: 3000,
      });
      return;
    }

    // If content already exists, use it right away
    if (doc.content && doc.content.trim() !== '') {
      await processEmbeddings(doc.id, doc.name, doc.content);
      return;
    }

    // Document exists but content is not loaded - log debug info
    console.log(`Attempting to load content for document: ${doc.name}`);
    console.log(`File info: size=${doc.file.size}, type=${doc.file.type}`);

    // Check if the document was loaded from localStorage and might have a corrupted file reference
    const isLikelyFromLocalStorage = doc.file.size === 0 || !(doc.file instanceof Blob);

    if (isLikelyFromLocalStorage) {
      toast({
        title: "File Reference Issue",
        description: "This document was restored from browser storage and cannot access its original file. Please re-upload the file or process it again first.",
        variant: "destructive",
        duration: 5000,
      });
      return;
    }

    try {
      // Document exists but content might not be loaded - try to load it
      const content = await readFileContent(doc.file);
      if (content && content.trim() !== '') {
        // Update the document with content first
        setDocuments(prevDocs =>
          prevDocs.map(d => {
            if (d.id === documentId) {
              return {
                ...d,
                content: content
              };
            }
            return d;
          })
        );

        // Continue with the loaded content
        await processEmbeddings(doc.id, doc.name, content);
      } else {
        toast({
          title: "Empty Document",
          description: "The document file appears to be empty",
          variant: "destructive",
          duration: 3000,
        });
      }
    } catch (error) {
      toast({
        title: "Content Loading Error",
        description: `Failed to load document content: ${error instanceof Error ? error.message : String(error)}`,
        variant: "destructive",
        duration: 5000,
      });
    }
  };

  // Helper function to handle the actual embeddings processing
  const processEmbeddings = async (documentId: string, documentName: string, content: string) => {
    setIsGeneratingEmbeddings(true);
    try {
      console.log(`Generating embeddings for document: ${documentName}`);

      // Update embeddings status to show it's processing, without changing main document status
      setDocuments(prevDocs =>
        prevDocs.map(d => {
          if (d.id === documentId) {
            return {
              ...d,
              embeddings: {
                count: d.embeddings?.count || 0,
                generated: d.embeddings?.generated || new Date(),
                status: "Processing" as const
              }
            };
          }
          return d;
        })
      );

      const response = await fetch('/api/embeddings', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json'
        },
        body: JSON.stringify({
          documentId: documentId,
          content: content,
          documentName: documentName
        })
      });

      if (!response.ok) {
        throw new Error(`Failed to generate embeddings: ${await response.text()}`);
      }

      const result = await response.json();
      console.log('Embeddings generation result:', result);

      // Update embeddings status to show it's processed
      setDocuments(prevDocs =>
        prevDocs.map(d => {
          if (d.id === documentId) {
            return {
              ...d,
              embeddings: {
                count: result.embeddings,
                generated: new Date(),
                status: "Processed" as const
              }
            };
          }
          return d;
        })
      );

      // Show a toast notification
      toast({
        title: "Embeddings Generated",
        description: `Successfully generated ${result.embeddings} embeddings for "${documentName}"`,
        duration: 5000,
      });

    } catch (error) {
      console.error('Error generating embeddings:', error);

      // Update embeddings status to show there was an error
      setDocuments(prevDocs =>
        prevDocs.map(d => {
          if (d.id === documentId) {
            return {
              ...d,
              embeddings: {
                count: d.embeddings?.count || 0,
                generated: d.embeddings?.generated || new Date(),
                status: "Error" as const,
                error: error instanceof Error ? error.message : String(error)
              }
            };
          }
          return d;
        })
      );

      toast({
        title: "Embeddings Generation Failed",
        description: `Failed to generate embeddings: ${error instanceof Error ? error.message : String(error)}`,
        variant: "destructive",
        duration: 5000,
      });
    } finally {
      setIsGeneratingEmbeddings(false);
    }
  };

  return (
    <DocumentContext.Provider
      value={{
        documents,
        addDocuments,
        deleteDocuments,
        clearDocuments,
        processDocuments,
    processDocumentsLegacy,
        isProcessing,
        updateTriples,
        addTriple,
        editTriple,
        deleteTriple,
        openGraphVisualization,
        generateEmbeddings,
        isGeneratingEmbeddings
      }}
    >
      {children}
    </DocumentContext.Provider>
  )
}

export function useDocuments() {
  const context = useContext(DocumentContext)
  if (context === undefined) {
    throw new Error("useDocuments must be used within a DocumentProvider")
  }
  return context
}