dgx-spark-playbooks/nvidia/txt2kg/assets/frontend/components/document-processor.tsx

//
// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
"use client";

import { useState } from "react";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";
import { AlertCircle, FileText, FileUp, Loader2, Zap } from "lucide-react";
import { Progress } from "@/components/ui/progress";
import { Alert, AlertDescription } from "@/components/ui/alert";
import { useToast } from "@/components/ui/use-toast";
import { Switch } from "@/components/ui/switch";
import { Label } from "@/components/ui/label";
import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";
import { useDocuments } from "@/contexts/document-context";

interface DocumentProcessorProps {
  onComplete?: (results: any) => void;
  className?: string;
}

export function DocumentProcessor({ onComplete, className }: DocumentProcessorProps) {
  const { addDocuments, processDocuments, documents } = useDocuments();
  const [file, setFile] = useState<File | null>(null);
  const [isProcessing, setIsProcessing] = useState(false);
  const [progress, setProgress] = useState(0);
  const [processingStatus, setProcessingStatus] = useState("");
  const [error, setError] = useState<string | null>(null);
  const [processingTab, setProcessingTab] = useState<string>("triples");
  const [useSentenceChunking, setUseSentenceChunking] = useState(true);
  const [useEntityExtraction, setUseEntityExtraction] = useState(true);
  const { toast } = useToast();

  const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    const selectedFile = e.target.files?.[0] || null;
    if (selectedFile) {
      // Add file to document context for display in document list
      addDocuments([selectedFile]);
      setFile(selectedFile);
      setError(null);
      setProgress(0);
      setProcessingStatus("");
      
      // Show toast notification
      toast({
        title: "File Uploaded",
        description: `"${selectedFile.name}" added to document list.`,
        duration: 3000,
      });
    }
  };

  const processFile = async () => {
    if (!file) {
      setError("Please select a file to process");
      return;
    }

    try {
      setIsProcessing(true);
      setProgress(0);
      setProcessingStatus("Reading file...");
      setError(null);

      // Find the document ID for the file we're processing
      const docToProcess = documents.find(doc => doc.name === file.name);
      
      if (!docToProcess) {
        throw new Error("Document not found in document list");
      }

      // Use the document context to process documents with the specific ID
      await processDocuments([docToProcess.id], {
        useLangChain: true,
        useGraphTransformer: false,
        promptConfigs: undefined
      });
      setProgress(100);
      setProcessingStatus("Processing complete!");
      
      // Notify about completion
      toast({
        title: "Processing Complete",
        description: "Document has been processed successfully. You can now generate embeddings from the document table.",
        duration: 5000,
      });
      
      // Reset the file input
      setFile(null);
      
      // Call onComplete callback if provided
      if (onComplete) {
        onComplete({
          success: true,
          message: "Document processed successfully"
        });
      }
    } catch (err) {
      console.error("Error processing document:", err);
      setError(err instanceof Error ? err.message : "Unknown error processing document");
      
      toast({
        title: "Processing Failed",
        description: err instanceof Error ? err.message : "Failed to process document",
        variant: "destructive",
        duration: 5000,
      });
    } finally {
      setIsProcessing(false);
    }
  };

  // Process triples from text
  const processTriples = async (text: string, filename: string) => {
    setProcessingStatus("Extracting triples with LangChain...");

    let triples;
    
    // If it's a CSV file, process each row as a document with LLM extraction
    if (filename.toLowerCase().endsWith('.csv')) {
      setProcessingStatus(`Processing CSV file rows as documents (${(text.length / 1024).toFixed(2)} KB)...`);
      try {
        console.log(`🔥 DocumentProcessor: Starting CSV row-by-row processing for file: ${filename}`);
        triples = await parseCSVToTriples(text);
        console.log(`🔥 DocumentProcessor: Extracted ${triples.length} triples from CSV file`);
        setProgress(60);
        
        // For very large triple sets, limit what we send to the API
        const maxTriplesToProcess = 10000;
        let triplesToProcess = triples;
        
        if (triples.length > maxTriplesToProcess) {
          console.log(`Limiting triples to ${maxTriplesToProcess} out of ${triples.length} total`);
          setProcessingStatus(`Processing ${maxTriplesToProcess} of ${triples.length} triples (limited for performance)...`);
          triplesToProcess = triples.slice(0, maxTriplesToProcess);
        }
        
        setProcessingStatus(`Processing ${triplesToProcess.length} triples...`);
        
        // Process document to create backend with embeddings
        // NOTE: This API no longer automatically stores triples in Neo4j.
        // Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button.
        const processingResponse = await fetch('/api/process-document', {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify({ 
            text: `CSV file with ${triples.length} triples`,  // Don't send the full CSV content
            filename,
            triples: triplesToProcess
          })
        });

        if (!processingResponse.ok) {
          throw new Error(`Failed to process document: ${processingResponse.statusText}`);
        }

        const processingData = await processingResponse.json();
        setProgress(100);
        setProcessingStatus("Processing complete!");

        // Notify about completion
        toast({
          title: "CSV Processing Complete",
          description: `Processed ${triplesToProcess.length} triples${triples.length > triplesToProcess.length ? ' (limited from ' + triples.length + ' total)' : ''} from your CSV file.`,
          duration: 5000,
        });

        // Call the onComplete callback with results
        if (onComplete) {
          onComplete({
            triples: triplesToProcess,
            totalTriples: triples.length,
            embeddings: processingData.embeddings || [],
            filename
          });
        }
        
        return; // Early return to skip the standard processing flow
      } catch (err) {
        console.error(`CSV processing error:`, err);
        throw new Error(`Failed to parse CSV file: ${err instanceof Error ? err.message : String(err)}`);
      }
    }
    
    // Standard processing for non-CSV files
    const extractResponse = await fetch('/api/extract-triples', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ text })
    });

    if (!extractResponse.ok) {
      throw new Error(`Failed to extract triples: ${extractResponse.statusText}`);
    }

    const extractData = await extractResponse.json();
    triples = extractData.triples;
    setProgress(60);
    
    setProcessingStatus("Generating embeddings...");

    // Process document to create backend with embeddings
    // NOTE: This API no longer automatically stores triples in Neo4j.
    // Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button.
    const processingResponse = await fetch('/api/process-document', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ 
        text,
        filename,
        triples
      })
    });

    if (!processingResponse.ok) {
      throw new Error(`Failed to process document: ${processingResponse.statusText}`);
    }

    const processingData = await processingResponse.json();
    setProgress(100);
    setProcessingStatus("Processing complete!");

    // Notify about completion
    toast({
      title: "Triple Extraction Complete",
      description: `Extracted ${triples.length} triples and generated embeddings for the knowledge graph.`,
      duration: 5000,
    });

    // Call the onComplete callback with results
    if (onComplete) {
      onComplete({
        triples,
        embeddings: processingData.embeddings || [],
        filename
      });
    }
  };

  // Process sentence embeddings
  const processSentenceEmbeddings = async (text: string, filename: string) => {
    // If it's a CSV file, we need to convert it to text first
    let processableText = text;
    
    if (filename.toLowerCase().endsWith('.csv')) {
      setProcessingStatus("Preparing CSV data for embedding...");
      try {
        // For CSV files, we'll use the content of the cells as text to generate embeddings
        const triples = await parseCSVToTriples(text);
        // Create a text representation by joining subjects, predicates and objects
        processableText = triples
          .map(t => `${t.subject} ${t.predicate} ${t.object}`)
          .join('. ');
      } catch (err) {
        throw new Error(`Failed to process CSV file: ${err instanceof Error ? err.message : String(err)}`);
      }
    }
    
    setProcessingStatus("Chunking text into sentences...");

    // Call sentence embeddings API
    const embeddingsResponse = await fetch('/api/sentence-embeddings', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify({ 
        text: processableText,
        documentId: filename 
      })
    });

    if (!embeddingsResponse.ok) {
      throw new Error(`Failed to process sentence embeddings: ${embeddingsResponse.statusText}`);
    }

    const embeddingsData = await embeddingsResponse.json();
    setProgress(100);
    setProcessingStatus("Sentence embeddings complete!");

    // Notify about completion
    toast({
      title: "Sentence Embeddings Complete",
      description: `Generated embeddings for ${embeddingsData.count} sentences from your document.`,
      duration: 5000,
    });

    // Show sample sentences in console for debugging
    console.log("Sample sentences:", embeddingsData.samples);

    // Call the onComplete callback with results
    if (onComplete) {
      onComplete({
        sentenceCount: embeddingsData.count,
        samples: embeddingsData.samples,
        filename
      });
    }
  };

  // Helper function to read file content
  const readFileContent = (file: File): Promise<string> => {
    return new Promise((resolve, reject) => {
      console.log(`Reading file: ${file.name}, size: ${(file.size / 1024).toFixed(2)} KB`);
      
      const reader = new FileReader();
      reader.onload = (event) => {
        if (event.target?.result) {
          const content = event.target.result as string;
          console.log(`File content loaded, length: ${content.length} characters`);
          
          // Special handling for CSV files
          if (file.name.toLowerCase().endsWith('.csv')) {
            try {
              console.log(`Processing CSV file content...`);
              // Don't parse here, just validate the content
              const lineCount = content.split('\n').length;
              console.log(`CSV file has ${lineCount} lines`);
              resolve(content);
            } catch (err) {
              console.error(`CSV parsing error:`, err);
              reject(new Error(`Failed to parse CSV file: ${err instanceof Error ? err.message : String(err)}`));
            }
          } else if (file.name.toLowerCase().endsWith('.json')) {
            try {
              console.log(`Processing JSON file content...`);
              // Convert JSON to readable text format for processing
              const textContent = convertJsonToText(content);
              console.log(`Converted JSON file to text format, length: ${textContent.length} characters`);
              resolve(textContent);
            } catch (err) {
              console.error(`JSON conversion error:`, err);
              reject(new Error(`Failed to process JSON file: ${err instanceof Error ? err.message : String(err)}`));
            }
          } else {
            resolve(content);
          }
        } else {
          reject(new Error("Failed to read file content"));
        }
      };
      reader.onerror = (error) => {
        console.error(`Error reading file:`, error);
        reject(new Error("Error reading file"));
      };
      reader.readAsText(file);
    });
  };

  // Helper function to convert JSON content to readable text format
  const convertJsonToText = (jsonContent: string): string => {
    try {
      // Parse the JSON to validate it
      const jsonData = JSON.parse(jsonContent);
      
      // Convert JSON to a readable text format that preserves structure and relationships
      const formatJsonObject = (obj: any, indent: number = 0): string => {
        const spaces = '  '.repeat(indent);
        
        if (obj === null || obj === undefined) {
          return 'null';
        }
        
        if (typeof obj === 'string' || typeof obj === 'number' || typeof obj === 'boolean') {
          return String(obj);
        }
        
        if (Array.isArray(obj)) {
          if (obj.length === 0) return '[]';
          const items = obj.map((item, index) => 
            `${spaces}  Item ${index + 1}: ${formatJsonObject(item, indent + 1)}`
          ).join('\n');
          return `[\n${items}\n${spaces}]`;
        }
        
        if (typeof obj === 'object') {
          const entries = Object.entries(obj);
          if (entries.length === 0) return '{}';
          
          const props = entries.map(([key, value]) => 
            `${spaces}  ${key}: ${formatJsonObject(value, indent + 1)}`
          ).join('\n');
          return `{\n${props}\n${spaces}}`;
        }
        
        return String(obj);
      };
      
      // Create a descriptive text representation
      let textContent = `JSON Document Content:\n\n`;
      textContent += formatJsonObject(jsonData);
      
      return textContent;
    } catch (error) {
      console.warn('Failed to parse JSON, treating as plain text:', error);
      // If JSON parsing fails, return the original content as-is
      return jsonContent;
    }
  }

  // Parse CSV file and process each row as a document for LLM-based triple extraction
  const parseCSVToTriples = async (csvContent: string): Promise<any[]> => {
    console.log(`Processing CSV content as individual documents, length: ${csvContent.length} characters`);
    
    // Split the CSV content into lines
    const lines = csvContent.split('\n').filter(line => line.trim().length > 0);
    console.log(`CSV has ${lines.length} non-empty lines`);
    
    if (lines.length < 2) {
      throw new Error("CSV file must contain a header row and at least one data row");
    }
    
    // Parse the header row
    const header = lines[0].split(',').map(h => h.trim().replace(/^"(.*)"$/, '$1'));
    console.log(`CSV headers: ${header.join(', ')}`);
    
    // Get data rows (skip header)
    const dataRows = lines.slice(1);
    console.log(`Processing ${dataRows.length} data rows as individual documents`);
    
    let allTriples: any[] = [];
    
    // Process each row as a separate document
    for (let rowIdx = 0; rowIdx < dataRows.length; rowIdx++) {
      const line = dataRows[rowIdx];
      setProcessingStatus(`Processing CSV row ${rowIdx + 1}/${dataRows.length} with LLM...`);
      
      try {
        // Parse CSV row into fields
        const fields: string[] = [];
        let fieldStart = 0;
        let inQuotes = false;
        
        for (let i = 0; i < line.length; i++) {
          if (line[i] === '"') {
            inQuotes = !inQuotes;
          } else if (line[i] === ',' && !inQuotes) {
            fields.push(line.substring(fieldStart, i).trim().replace(/^"(.*)"$/, '$1'));
            fieldStart = i + 1;
          }
        }
        
        // Add the last field
        fields.push(line.substring(fieldStart).trim().replace(/^"(.*)"$/, '$1'));
        
        // Create document text from the row data
        let documentText = '';
        for (let i = 0; i < Math.min(header.length, fields.length); i++) {
          if (fields[i] && fields[i].trim()) {
            documentText += `${header[i]}: ${fields[i]}\n`;
          }
        }
        
        // Skip empty rows
        if (!documentText.trim()) {
          console.warn(`Skipping empty CSV row ${rowIdx + 1}`);
          continue;
        }
        
        console.log(`Processing row ${rowIdx + 1} as document: ${documentText.substring(0, 100)}...`);
        
        // Extract triples from this row's text using LLM
        try {
          const response = await fetch('/api/extract-triples', {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({ 
              text: documentText,
              useLangChain: true // Use LLM-based extraction
            })
          });

          if (!response.ok) {
            console.error(`Failed to extract triples from row ${rowIdx + 1}: ${response.statusText}`);
            continue;
          }

          const data = await response.json();
          if (data.triples && Array.isArray(data.triples)) {
            console.log(`Extracted ${data.triples.length} triples from row ${rowIdx + 1}`);
            allTriples = allTriples.concat(data.triples);
          }
        } catch (error) {
          console.error(`Error processing row ${rowIdx + 1}:`, error);
          continue;
        }
        
        // Update progress
        setProgress(20 + (rowIdx / dataRows.length) * 40);
        
      } catch (parseError) {
        console.error(`Error parsing CSV row ${rowIdx + 1}:`, parseError);
        continue;
      }
    }
    
    console.log(`Successfully extracted ${allTriples.length} triples from ${dataRows.length} CSV rows`);
    return allTriples;
  };

  return (
    <Card className={className}>
      <CardHeader>
        <CardTitle>Process Document</CardTitle>
        <CardDescription>
          Extract triples from documents and build a knowledge graph
        </CardDescription>
      </CardHeader>
      <CardContent>
        <div className="space-y-4">
          <div className="flex items-start gap-4">
            <div className="grid w-full gap-2">
              <label htmlFor="document-upload" className="cursor-pointer">
                <div className="flex h-24 w-full items-center justify-center rounded-md border border-dashed border-input bg-muted/50 p-4 hover:bg-muted/80 transition-colors">
                  <div className="flex flex-col items-center gap-2">
                    <FileUp className="h-10 w-10 text-muted-foreground" />
                    <span className="text-sm font-medium text-muted-foreground">
                      {file ? file.name : "Upload document"}
                    </span>
                  </div>
                </div>
                <input
                  id="document-upload"
                  type="file"
                  accept=".md,.txt,.csv"
                  onChange={handleFileChange}
                  className="sr-only"
                />
              </label>
            </div>
          </div>

          <Tabs 
            defaultValue="triples" 
            value={processingTab}
            onValueChange={setProcessingTab}
            className="w-full"
          >
            <TabsList className="grid w-full grid-cols-2">
              <TabsTrigger value="triples">Knowledge Triples</TabsTrigger>
              <TabsTrigger value="embeddings">Sentence Embeddings</TabsTrigger>
            </TabsList>
            <TabsContent value="triples">
              <div className="space-y-4 py-4">
                <div className="flex items-center space-x-2">
                  <Switch 
                    id="use-sentence-chunking" 
                    checked={useSentenceChunking}
                    onCheckedChange={setUseSentenceChunking}
                  />
                  <Label htmlFor="use-sentence-chunking">Use sentence-level chunking</Label>
                </div>
              </div>
            </TabsContent>
            <TabsContent value="embeddings">
              <div className="py-4 text-sm text-muted-foreground">
                You can now generate embeddings directly from the document table after processing.
                <div className="flex items-center mt-2 p-2 bg-muted/30 rounded-md">
                  <Zap className="h-4 w-4 text-primary mr-2" />
                  <span>Click the lightning icon in the document table to generate embeddings</span>
                </div>
              </div>
            </TabsContent>
          </Tabs>

          {error && (
            <Alert variant="destructive">
              <AlertCircle className="h-4 w-4" />
              <AlertDescription>{error}</AlertDescription>
            </Alert>
          )}

          {isProcessing && (
            <div className="space-y-2">
              <div className="flex items-center gap-2 text-sm">
                <Loader2 className="h-4 w-4 animate-spin" />
                <span>{processingStatus}</span>
              </div>
              <Progress value={progress} className="h-2 w-full" />
            </div>
          )}

          <Button
            onClick={processFile}
            className="w-full"
            disabled={!file || isProcessing}
          >
            {isProcessing ? (
              <>
                <Loader2 className="mr-2 h-4 w-4 animate-spin" />
                Processing...
              </>
            ) : (
              <>
                <FileText className="mr-2 h-4 w-4" />
                Process Document & Generate Triples
              </>
            )}
          </Button>
        </div>
      </CardContent>
    </Card>
  );
}
chore: Regenerate all playbooks 2025-12-02 19:43:52 +00:00			`//`
			`// SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.`
			`// SPDX-License-Identifier: Apache-2.0`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`
			`//`
chore: Regenerate all playbooks 2025-10-06 17:05:41 +00:00			`"use client";`

			`import { useState } from "react";`
			`import { Button } from "@/components/ui/button";`
			`import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card";`
			`import { AlertCircle, FileText, FileUp, Loader2, Zap } from "lucide-react";`
			`import { Progress } from "@/components/ui/progress";`
			`import { Alert, AlertDescription } from "@/components/ui/alert";`
			`import { useToast } from "@/components/ui/use-toast";`
			`import { Switch } from "@/components/ui/switch";`
			`import { Label } from "@/components/ui/label";`
			`import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs";`
			`import { useDocuments } from "@/contexts/document-context";`

			`interface DocumentProcessorProps {`
			`onComplete?: (results: any) => void;`
			`className?: string;`
			`}`

			`export function DocumentProcessor({ onComplete, className }: DocumentProcessorProps) {`
			`const { addDocuments, processDocuments, documents } = useDocuments();`
			`const [file, setFile] = useState<File \| null>(null);`
			`const [isProcessing, setIsProcessing] = useState(false);`
			`const [progress, setProgress] = useState(0);`
			`const [processingStatus, setProcessingStatus] = useState("");`
			`const [error, setError] = useState<string \| null>(null);`
			`const [processingTab, setProcessingTab] = useState<string>("triples");`
			`const [useSentenceChunking, setUseSentenceChunking] = useState(true);`
			`const [useEntityExtraction, setUseEntityExtraction] = useState(true);`
			`const { toast } = useToast();`

			`const handleFileChange = (e: React.ChangeEvent<HTMLInputElement>) => {`
			`const selectedFile = e.target.files?.[0] \|\| null;`
			`if (selectedFile) {`
			`// Add file to document context for display in document list`
			`addDocuments([selectedFile]);`
			`setFile(selectedFile);`
			`setError(null);`
			`setProgress(0);`
			`setProcessingStatus("");`

			`// Show toast notification`
			`toast({`
			`title: "File Uploaded",`
			description: `"${selectedFile.name}" added to document list.`,
			`duration: 3000,`
			`});`
			`}`
			`};`

			`const processFile = async () => {`
			`if (!file) {`
			`setError("Please select a file to process");`
			`return;`
			`}`

			`try {`
			`setIsProcessing(true);`
			`setProgress(0);`
			`setProcessingStatus("Reading file...");`
			`setError(null);`

			`// Find the document ID for the file we're processing`
			`const docToProcess = documents.find(doc => doc.name === file.name);`

			`if (!docToProcess) {`
			`throw new Error("Document not found in document list");`
			`}`

			`// Use the document context to process documents with the specific ID`
			`await processDocuments([docToProcess.id], {`
			`useLangChain: true,`
			`useGraphTransformer: false,`
			`promptConfigs: undefined`
			`});`
			`setProgress(100);`
			`setProcessingStatus("Processing complete!");`

			`// Notify about completion`
			`toast({`
			`title: "Processing Complete",`
			`description: "Document has been processed successfully. You can now generate embeddings from the document table.",`
			`duration: 5000,`
			`});`

			`// Reset the file input`
			`setFile(null);`

			`// Call onComplete callback if provided`
			`if (onComplete) {`
			`onComplete({`
			`success: true,`
			`message: "Document processed successfully"`
			`});`
			`}`
			`} catch (err) {`
			`console.error("Error processing document:", err);`
			`setError(err instanceof Error ? err.message : "Unknown error processing document");`

			`toast({`
			`title: "Processing Failed",`
			`description: err instanceof Error ? err.message : "Failed to process document",`
			`variant: "destructive",`
			`duration: 5000,`
			`});`
			`} finally {`
			`setIsProcessing(false);`
			`}`
			`};`

			`// Process triples from text`
			`const processTriples = async (text: string, filename: string) => {`
			`setProcessingStatus("Extracting triples with LangChain...");`

			`let triples;`

			`// If it's a CSV file, process each row as a document with LLM extraction`
			`if (filename.toLowerCase().endsWith('.csv')) {`
			setProcessingStatus(`Processing CSV file rows as documents (${(text.length / 1024).toFixed(2)} KB)...`);
			`try {`
			console.log(`🔥 DocumentProcessor: Starting CSV row-by-row processing for file: ${filename}`);
			`triples = await parseCSVToTriples(text);`
			console.log(`🔥 DocumentProcessor: Extracted ${triples.length} triples from CSV file`);
			`setProgress(60);`

			`// For very large triple sets, limit what we send to the API`
			`const maxTriplesToProcess = 10000;`
			`let triplesToProcess = triples;`

			`if (triples.length > maxTriplesToProcess) {`
			console.log(`Limiting triples to ${maxTriplesToProcess} out of ${triples.length} total`);
			setProcessingStatus(`Processing ${maxTriplesToProcess} of ${triples.length} triples (limited for performance)...`);
			`triplesToProcess = triples.slice(0, maxTriplesToProcess);`
			`}`

			setProcessingStatus(`Processing ${triplesToProcess.length} triples...`);

			`// Process document to create backend with embeddings`
			`// NOTE: This API no longer automatically stores triples in Neo4j.`
			`// Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button.`
			`const processingResponse = await fetch('/api/process-document', {`
			`method: 'POST',`
			`headers: { 'Content-Type': 'application/json' },`
			`body: JSON.stringify({`
			text: `CSV file with ${triples.length} triples`, // Don't send the full CSV content
			`filename,`
			`triples: triplesToProcess`
			`})`
			`});`

			`if (!processingResponse.ok) {`
			throw new Error(`Failed to process document: ${processingResponse.statusText}`);
			`}`

			`const processingData = await processingResponse.json();`
			`setProgress(100);`
			`setProcessingStatus("Processing complete!");`

			`// Notify about completion`
			`toast({`
			`title: "CSV Processing Complete",`
			description: `Processed ${triplesToProcess.length} triples${triples.length > triplesToProcess.length ? ' (limited from ' + triples.length + ' total)' : ''} from your CSV file.`,
			`duration: 5000,`
			`});`

			`// Call the onComplete callback with results`
			`if (onComplete) {`
			`onComplete({`
			`triples: triplesToProcess,`
			`totalTriples: triples.length,`
			`embeddings: processingData.embeddings \|\| [],`
			`filename`
			`});`
			`}`

			`return; // Early return to skip the standard processing flow`
			`} catch (err) {`
			console.error(`CSV processing error:`, err);
			throw new Error(`Failed to parse CSV file: ${err instanceof Error ? err.message : String(err)}`);
			`}`
			`}`

			`// Standard processing for non-CSV files`
			`const extractResponse = await fetch('/api/extract-triples', {`
			`method: 'POST',`
			`headers: { 'Content-Type': 'application/json' },`
			`body: JSON.stringify({ text })`
			`});`

			`if (!extractResponse.ok) {`
			throw new Error(`Failed to extract triples: ${extractResponse.statusText}`);
			`}`

			`const extractData = await extractResponse.json();`
			`triples = extractData.triples;`
			`setProgress(60);`

			`setProcessingStatus("Generating embeddings...");`

			`// Process document to create backend with embeddings`
			`// NOTE: This API no longer automatically stores triples in Neo4j.`
			`// Storage in Neo4j is now handled manually through the UI's "Store in Graph DB" button.`
			`const processingResponse = await fetch('/api/process-document', {`
			`method: 'POST',`
			`headers: { 'Content-Type': 'application/json' },`
			`body: JSON.stringify({`
			`text,`
			`filename,`
			`triples`
			`})`
			`});`

			`if (!processingResponse.ok) {`
			throw new Error(`Failed to process document: ${processingResponse.statusText}`);
			`}`

			`const processingData = await processingResponse.json();`
			`setProgress(100);`
			`setProcessingStatus("Processing complete!");`

			`// Notify about completion`
			`toast({`
			`title: "Triple Extraction Complete",`
			description: `Extracted ${triples.length} triples and generated embeddings for the knowledge graph.`,
			`duration: 5000,`
			`});`

			`// Call the onComplete callback with results`
			`if (onComplete) {`
			`onComplete({`
			`triples,`
			`embeddings: processingData.embeddings \|\| [],`
			`filename`
			`});`
			`}`
			`};`

			`// Process sentence embeddings`
			`const processSentenceEmbeddings = async (text: string, filename: string) => {`
			`// If it's a CSV file, we need to convert it to text first`
			`let processableText = text;`

			`if (filename.toLowerCase().endsWith('.csv')) {`
			`setProcessingStatus("Preparing CSV data for embedding...");`
			`try {`
			`// For CSV files, we'll use the content of the cells as text to generate embeddings`
			`const triples = await parseCSVToTriples(text);`
			`// Create a text representation by joining subjects, predicates and objects`
			`processableText = triples`
			.map(t => `${t.subject} ${t.predicate} ${t.object}`)
			`.join('. ');`
			`} catch (err) {`
			throw new Error(`Failed to process CSV file: ${err instanceof Error ? err.message : String(err)}`);
			`}`
			`}`

			`setProcessingStatus("Chunking text into sentences...");`

			`// Call sentence embeddings API`
			`const embeddingsResponse = await fetch('/api/sentence-embeddings', {`
			`method: 'POST',`
			`headers: { 'Content-Type': 'application/json' },`
			`body: JSON.stringify({`
			`text: processableText,`
			`documentId: filename`
			`})`
			`});`

			`if (!embeddingsResponse.ok) {`
			throw new Error(`Failed to process sentence embeddings: ${embeddingsResponse.statusText}`);
			`}`

			`const embeddingsData = await embeddingsResponse.json();`
			`setProgress(100);`
			`setProcessingStatus("Sentence embeddings complete!");`

			`// Notify about completion`
			`toast({`
			`title: "Sentence Embeddings Complete",`
			description: `Generated embeddings for ${embeddingsData.count} sentences from your document.`,
			`duration: 5000,`
			`});`

			`// Show sample sentences in console for debugging`
			`console.log("Sample sentences:", embeddingsData.samples);`

			`// Call the onComplete callback with results`
			`if (onComplete) {`
			`onComplete({`
			`sentenceCount: embeddingsData.count,`
			`samples: embeddingsData.samples,`
			`filename`
			`});`
			`}`
			`};`

			`// Helper function to read file content`
			`const readFileContent = (file: File): Promise<string> => {`
			`return new Promise((resolve, reject) => {`
			console.log(`Reading file: ${file.name}, size: ${(file.size / 1024).toFixed(2)} KB`);

			`const reader = new FileReader();`
			`reader.onload = (event) => {`
			`if (event.target?.result) {`
			`const content = event.target.result as string;`
			console.log(`File content loaded, length: ${content.length} characters`);

			`// Special handling for CSV files`
			`if (file.name.toLowerCase().endsWith('.csv')) {`
			`try {`
			console.log(`Processing CSV file content...`);
			`// Don't parse here, just validate the content`
			`const lineCount = content.split('\n').length;`
			console.log(`CSV file has ${lineCount} lines`);
			`resolve(content);`
			`} catch (err) {`
			console.error(`CSV parsing error:`, err);
			reject(new Error(`Failed to parse CSV file: ${err instanceof Error ? err.message : String(err)}`));
			`}`
			`} else if (file.name.toLowerCase().endsWith('.json')) {`
			`try {`
			console.log(`Processing JSON file content...`);
			`// Convert JSON to readable text format for processing`
			`const textContent = convertJsonToText(content);`
			console.log(`Converted JSON file to text format, length: ${textContent.length} characters`);
			`resolve(textContent);`
			`} catch (err) {`
			console.error(`JSON conversion error:`, err);
			reject(new Error(`Failed to process JSON file: ${err instanceof Error ? err.message : String(err)}`));
			`}`
			`} else {`
			`resolve(content);`
			`}`
			`} else {`
			`reject(new Error("Failed to read file content"));`
			`}`
			`};`
			`reader.onerror = (error) => {`
			console.error(`Error reading file:`, error);
			`reject(new Error("Error reading file"));`
			`};`
			`reader.readAsText(file);`
			`});`
			`};`

			`// Helper function to convert JSON content to readable text format`
			`const convertJsonToText = (jsonContent: string): string => {`
			`try {`
			`// Parse the JSON to validate it`
			`const jsonData = JSON.parse(jsonContent);`

			`// Convert JSON to a readable text format that preserves structure and relationships`
			`const formatJsonObject = (obj: any, indent: number = 0): string => {`
			`const spaces = ' '.repeat(indent);`

			`if (obj === null \|\| obj === undefined) {`
			`return 'null';`
			`}`

			`if (typeof obj === 'string' \|\| typeof obj === 'number' \|\| typeof obj === 'boolean') {`
			`return String(obj);`
			`}`

			`if (Array.isArray(obj)) {`
			`if (obj.length === 0) return '[]';`
			`const items = obj.map((item, index) =>`
			`${spaces} Item ${index + 1}: ${formatJsonObject(item, indent + 1)}`
			`).join('\n');`
			return `[\n${items}\n${spaces}]`;
			`}`

			`if (typeof obj === 'object') {`
			`const entries = Object.entries(obj);`
			`if (entries.length === 0) return '{}';`

			`const props = entries.map(([key, value]) =>`
			`${spaces} ${key}: ${formatJsonObject(value, indent + 1)}`
			`).join('\n');`
			return `{\n${props}\n${spaces}}`;
			`}`

			`return String(obj);`
			`};`

			`// Create a descriptive text representation`
			let textContent = `JSON Document Content:\n\n`;
			`textContent += formatJsonObject(jsonData);`

			`return textContent;`
			`} catch (error) {`
			`console.warn('Failed to parse JSON, treating as plain text:', error);`
			`// If JSON parsing fails, return the original content as-is`
			`return jsonContent;`
			`}`
			`}`

			`// Parse CSV file and process each row as a document for LLM-based triple extraction`
			`const parseCSVToTriples = async (csvContent: string): Promise<any[]> => {`
			console.log(`Processing CSV content as individual documents, length: ${csvContent.length} characters`);

			`// Split the CSV content into lines`
			`const lines = csvContent.split('\n').filter(line => line.trim().length > 0);`
			console.log(`CSV has ${lines.length} non-empty lines`);

			`if (lines.length < 2) {`
			`throw new Error("CSV file must contain a header row and at least one data row");`
			`}`

			`// Parse the header row`
			`const header = lines[0].split(',').map(h => h.trim().replace(/^"(.*)"$/, '$1'));`
			console.log(`CSV headers: ${header.join(', ')}`);

			`// Get data rows (skip header)`
			`const dataRows = lines.slice(1);`
			console.log(`Processing ${dataRows.length} data rows as individual documents`);

			`let allTriples: any[] = [];`

			`// Process each row as a separate document`
			`for (let rowIdx = 0; rowIdx < dataRows.length; rowIdx++) {`
			`const line = dataRows[rowIdx];`
			setProcessingStatus(`Processing CSV row ${rowIdx + 1}/${dataRows.length} with LLM...`);

			`try {`
			`// Parse CSV row into fields`
			`const fields: string[] = [];`
			`let fieldStart = 0;`
			`let inQuotes = false;`

			`for (let i = 0; i < line.length; i++) {`
			`if (line[i] === '"') {`
			`inQuotes = !inQuotes;`
			`} else if (line[i] === ',' && !inQuotes) {`
			`fields.push(line.substring(fieldStart, i).trim().replace(/^"(.*)"$/, '$1'));`
			`fieldStart = i + 1;`
			`}`
			`}`

			`// Add the last field`
			`fields.push(line.substring(fieldStart).trim().replace(/^"(.*)"$/, '$1'));`

			`// Create document text from the row data`
			`let documentText = '';`
			`for (let i = 0; i < Math.min(header.length, fields.length); i++) {`
			`if (fields[i] && fields[i].trim()) {`
			documentText += `${header[i]}: ${fields[i]}\n`;
			`}`
			`}`

			`// Skip empty rows`
			`if (!documentText.trim()) {`
			console.warn(`Skipping empty CSV row ${rowIdx + 1}`);
			`continue;`
			`}`

			console.log(`Processing row ${rowIdx + 1} as document: ${documentText.substring(0, 100)}...`);

			`// Extract triples from this row's text using LLM`
			`try {`
			`const response = await fetch('/api/extract-triples', {`
			`method: 'POST',`
			`headers: { 'Content-Type': 'application/json' },`
			`body: JSON.stringify({`
			`text: documentText,`
			`useLangChain: true // Use LLM-based extraction`
			`})`
			`});`

			`if (!response.ok) {`
			console.error(`Failed to extract triples from row ${rowIdx + 1}: ${response.statusText}`);
			`continue;`
			`}`

			`const data = await response.json();`
			`if (data.triples && Array.isArray(data.triples)) {`
			console.log(`Extracted ${data.triples.length} triples from row ${rowIdx + 1}`);
			`allTriples = allTriples.concat(data.triples);`
			`}`
			`} catch (error) {`
			console.error(`Error processing row ${rowIdx + 1}:`, error);
			`continue;`
			`}`

			`// Update progress`
			`setProgress(20 + (rowIdx / dataRows.length) * 40);`

			`} catch (parseError) {`
			console.error(`Error parsing CSV row ${rowIdx + 1}:`, parseError);
			`continue;`
			`}`
			`}`

			console.log(`Successfully extracted ${allTriples.length} triples from ${dataRows.length} CSV rows`);
			`return allTriples;`
			`};`

			`return (`
			`<Card className={className}>`
			`<CardHeader>`
			`<CardTitle>Process Document</CardTitle>`
			`<CardDescription>`
			`Extract triples from documents and build a knowledge graph`
			`</CardDescription>`
			`</CardHeader>`
			`<CardContent>`
			`<div className="space-y-4">`
			`<div className="flex items-start gap-4">`
			`<div className="grid w-full gap-2">`
			`<label htmlFor="document-upload" className="cursor-pointer">`
			`<div className="flex h-24 w-full items-center justify-center rounded-md border border-dashed border-input bg-muted/50 p-4 hover:bg-muted/80 transition-colors">`
			`<div className="flex flex-col items-center gap-2">`
			`<FileUp className="h-10 w-10 text-muted-foreground" />`
			`<span className="text-sm font-medium text-muted-foreground">`
			`{file ? file.name : "Upload document"}`
			`</span>`
			`</div>`
			`</div>`
			`<input`
			`id="document-upload"`
			`type="file"`
			`accept=".md,.txt,.csv"`
			`onChange={handleFileChange}`
			`className="sr-only"`
			`/>`
			`</label>`
			`</div>`
			`</div>`

			`<Tabs`
			`defaultValue="triples"`
			`value={processingTab}`
			`onValueChange={setProcessingTab}`
			`className="w-full"`
			`>`
			`<TabsList className="grid w-full grid-cols-2">`
			`<TabsTrigger value="triples">Knowledge Triples</TabsTrigger>`
			`<TabsTrigger value="embeddings">Sentence Embeddings</TabsTrigger>`
			`</TabsList>`
			`<TabsContent value="triples">`
			`<div className="space-y-4 py-4">`
			`<div className="flex items-center space-x-2">`
			`<Switch`
			`id="use-sentence-chunking"`
			`checked={useSentenceChunking}`
			`onCheckedChange={setUseSentenceChunking}`
			`/>`
			`<Label htmlFor="use-sentence-chunking">Use sentence-level chunking</Label>`
			`</div>`
			`</div>`
			`</TabsContent>`
			`<TabsContent value="embeddings">`
			`<div className="py-4 text-sm text-muted-foreground">`
			`You can now generate embeddings directly from the document table after processing.`
			`<div className="flex items-center mt-2 p-2 bg-muted/30 rounded-md">`
			`<Zap className="h-4 w-4 text-primary mr-2" />`
			`<span>Click the lightning icon in the document table to generate embeddings</span>`
			`</div>`
			`</div>`
			`</TabsContent>`
			`</Tabs>`

			`{error && (`
			`<Alert variant="destructive">`
			`<AlertCircle className="h-4 w-4" />`
			`<AlertDescription>{error}</AlertDescription>`
			`</Alert>`
			`)}`

			`{isProcessing && (`
			`<div className="space-y-2">`
			`<div className="flex items-center gap-2 text-sm">`
			`<Loader2 className="h-4 w-4 animate-spin" />`
			`<span>{processingStatus}</span>`
			`</div>`
			`<Progress value={progress} className="h-2 w-full" />`
			`</div>`
			`)}`

			`<Button`
			`onClick={processFile}`
			`className="w-full"`
			`disabled={!file \|\| isProcessing}`
			`>`
			`{isProcessing ? (`
			`<>`
			`<Loader2 className="mr-2 h-4 w-4 animate-spin" />`
			`Processing...`
			`</>`
			`) : (`
			`<>`
			`<FileText className="mr-2 h-4 w-4" />`
			`Process Document & Generate Triples`
			`</>`
			`)}`
			`</Button>`
			`</div>`
			`</CardContent>`
			`</Card>`
			`);`
			`}`