dgx-spark-playbooks/nvidia/txt2kg/assets/frontend/app/api/process-document/route.ts

import { NextRequest, NextResponse } from 'next/server';
import { RemoteBackendService } from '@/lib/remote-backend';
import { EmbeddingsService } from '@/lib/embeddings';
import type { Triple } from '@/types/graph';
import { BackendService } from '@/lib/backend-service';
import { getGraphDbType } from '../settings/route';

/**
 * API endpoint for processing documents with LangChain, generating embeddings,
 * and storing in the knowledge graph
 * POST /api/process-document
 */
export async function POST(req: NextRequest) {
  try {
    // Parse request body
    const body = await req.json();
    const { 
      text, 
      filename, 
      triples, 
      useLangChain, 
      useGraphTransformer,
      systemPrompt,
      extractionPrompt,
      graphTransformerPrompt
    } = body;

    if (!text || typeof text !== 'string') {
      return NextResponse.json({ error: 'Text is required' }, { status: 400 });
    }

    if (!triples || !Array.isArray(triples)) {
      return NextResponse.json({ error: 'Triples are required' }, { status: 400 });
    }

    // Initialize services
    const backendService = RemoteBackendService.getInstance();
    const embeddingsService = EmbeddingsService.getInstance();

    console.log(`🔍 API: Processing document "${filename || 'unnamed'}" (${text.length} chars)`);
    console.log(`🔍 API: Processing ${triples.length} triples`);
    console.log(`🔍 API: Using LangChain for triple extraction: ${useLangChain ? 'Yes' : 'No'}`);
    console.log(`🔍 API: First few triples:`, triples.slice(0, 3));
    if (useLangChain) {
      console.log(`Using LLMGraphTransformer: ${useGraphTransformer ? 'Yes' : 'No'}`);
    }
    
    // Log if custom prompts are being used
    if (systemPrompt || extractionPrompt || graphTransformerPrompt) {
      console.log('Using custom prompts for extraction');
      if (systemPrompt) console.log('Custom system prompt provided');
      if (extractionPrompt) console.log('Custom extraction prompt provided');
      if (graphTransformerPrompt) console.log('Custom graph transformer prompt provided');
    }

    // Filter triples to ensure they are valid
    const validTriples = triples.filter((triple: any) => {
      return (
        triple &&
        typeof triple.subject === 'string' && triple.subject.trim() !== '' &&
        typeof triple.predicate === 'string' && triple.predicate.trim() !== '' &&
        typeof triple.object === 'string' && triple.object.trim() !== ''
      );
    }) as Triple[];

    console.log(`Found ${validTriples.length} valid triples`);

    // If useLangChain flag is set, we'll extract triples using the LangChain route
    let triplesForProcessing = validTriples;
    
    if (useLangChain && !filename?.toLowerCase().endsWith('.csv')) {
      try {
        console.log('Using LangChain for native triple extraction...');
        // Use absolute URL with origin from request to fix URL parsing error
        const baseUrl = new URL(req.url).origin;
        console.log(`Using base URL: ${baseUrl} for LangChain API call`);
        
        // Call the extract-triples endpoint with useLangChain flag and custom prompts
        const requestBody: any = { 
          text, 
          useLangChain: true,
          useGraphTransformer
        };
        
        // Add custom prompts if available
        if (systemPrompt) requestBody.systemPrompt = systemPrompt;
        if (extractionPrompt) requestBody.extractionPrompt = extractionPrompt;
        if (graphTransformerPrompt) requestBody.graphTransformerPrompt = graphTransformerPrompt;
        
        const langchainResponse = await fetch(`${baseUrl}/api/extract-triples`, {
          method: 'POST',
          headers: { 'Content-Type': 'application/json' },
          body: JSON.stringify(requestBody)
        });
        
        if (!langchainResponse.ok) {
          const errorText = await langchainResponse.text();
          console.error(`LangChain API error: ${langchainResponse.status} ${langchainResponse.statusText}`, errorText);
          throw new Error(`LangChain extraction failed: ${langchainResponse.statusText} (${langchainResponse.status})`);
        }
        
        const langchainResult = await langchainResponse.json();
        if (langchainResult.triples && Array.isArray(langchainResult.triples) && langchainResult.triples.length > 0) {
          console.log(`Successfully extracted ${langchainResult.triples.length} triples using LangChain${useGraphTransformer ? ' with GraphTransformer' : ''}`);
          triplesForProcessing = langchainResult.triples;
        } else {
          console.warn('LangChain extraction returned no triples, falling back to provided triples');
        }
      } catch (langchainError) {
        console.error('Error using LangChain for triple extraction:', langchainError);
        console.log('Falling back to provided triples');
      }
    }

    // Check if this is a CSV file - if so, skip processing
    const isCSVFile = filename && filename.toLowerCase().endsWith('.csv');
    const isJSONFile = filename && filename.toLowerCase().endsWith('.json');
    
    if (isCSVFile) {
      console.log('CSV file detected, skipping text processor');
      // NOTE: Neo4j storage is no longer done automatically
      // This is now handled manually through the "Store in Graph DB" button in the UI
    } else if (isJSONFile) {
      console.log('JSON file detected, processed as unstructured text document - embeddings can be generated manually via the UI');
      // NOTE: Automatic embeddings generation has been disabled for JSON files.
      // Embeddings are now generated only when explicitly requested through the "Generate Embeddings" button in the UI.
    } else {
      // Regular text processing flow - no automatic embeddings generation
      console.log('Document processed successfully - embeddings can be generated manually via the UI');
      // NOTE: Automatic embeddings generation has been disabled.
      // Embeddings are now generated only when explicitly requested through the "Generate Embeddings" button in the UI.
    }

    // Return success response
    return NextResponse.json({
      success: true,
      message: 'Document processed successfully',
      tripleCount: triplesForProcessing.length,
      triples: triplesForProcessing,
      documentName: filename || 'unnamed',
      langchainUsed: useLangChain,
      graphTransformerUsed: useGraphTransformer,
      customPromptsUsed: !!(systemPrompt || extractionPrompt || graphTransformerPrompt),
      graphDbType: getGraphDbType()
    });
  } catch (error) {
    console.error('Error processing document:', error);
    const errorMessage = error instanceof Error ? error.message : 'Unknown error';
    return NextResponse.json(
      { error: `Failed to process document: ${errorMessage}` },
      { status: 500 }
    );
  }
}
chore: Regenerate all playbooks 2025-10-06 17:05:41 +00:00			`import { NextRequest, NextResponse } from 'next/server';`
			`import { RemoteBackendService } from '@/lib/remote-backend';`
			`import { EmbeddingsService } from '@/lib/embeddings';`
			`import type { Triple } from '@/types/graph';`
			`import { BackendService } from '@/lib/backend-service';`
			`import { getGraphDbType } from '../settings/route';`

			`/**`
			`* API endpoint for processing documents with LangChain, generating embeddings,`
			`* and storing in the knowledge graph`
			`* POST /api/process-document`
			`*/`
			`export async function POST(req: NextRequest) {`
			`try {`
			`// Parse request body`
			`const body = await req.json();`
			`const {`
			`text,`
			`filename,`
			`triples,`
			`useLangChain,`
			`useGraphTransformer,`
			`systemPrompt,`
			`extractionPrompt,`
			`graphTransformerPrompt`
			`} = body;`

			`if (!text \|\| typeof text !== 'string') {`
			`return NextResponse.json({ error: 'Text is required' }, { status: 400 });`
			`}`

			`if (!triples \|\| !Array.isArray(triples)) {`
			`return NextResponse.json({ error: 'Triples are required' }, { status: 400 });`
			`}`

			`// Initialize services`
			`const backendService = RemoteBackendService.getInstance();`
			`const embeddingsService = EmbeddingsService.getInstance();`

			console.log(`🔍 API: Processing document "${filename \|\| 'unnamed'}" (${text.length} chars)`);
			console.log(`🔍 API: Processing ${triples.length} triples`);
			console.log(`🔍 API: Using LangChain for triple extraction: ${useLangChain ? 'Yes' : 'No'}`);
			console.log(`🔍 API: First few triples:`, triples.slice(0, 3));
			`if (useLangChain) {`
			console.log(`Using LLMGraphTransformer: ${useGraphTransformer ? 'Yes' : 'No'}`);
			`}`

			`// Log if custom prompts are being used`
			`if (systemPrompt \|\| extractionPrompt \|\| graphTransformerPrompt) {`
			`console.log('Using custom prompts for extraction');`
			`if (systemPrompt) console.log('Custom system prompt provided');`
			`if (extractionPrompt) console.log('Custom extraction prompt provided');`
			`if (graphTransformerPrompt) console.log('Custom graph transformer prompt provided');`
			`}`

			`// Filter triples to ensure they are valid`
			`const validTriples = triples.filter((triple: any) => {`
			`return (`
			`triple &&`
			`typeof triple.subject === 'string' && triple.subject.trim() !== '' &&`
			`typeof triple.predicate === 'string' && triple.predicate.trim() !== '' &&`
			`typeof triple.object === 'string' && triple.object.trim() !== ''`
			`);`
			`}) as Triple[];`

			console.log(`Found ${validTriples.length} valid triples`);

			`// If useLangChain flag is set, we'll extract triples using the LangChain route`
			`let triplesForProcessing = validTriples;`

			`if (useLangChain && !filename?.toLowerCase().endsWith('.csv')) {`
			`try {`
			`console.log('Using LangChain for native triple extraction...');`
			`// Use absolute URL with origin from request to fix URL parsing error`
			`const baseUrl = new URL(req.url).origin;`
			console.log(`Using base URL: ${baseUrl} for LangChain API call`);

			`// Call the extract-triples endpoint with useLangChain flag and custom prompts`
			`const requestBody: any = {`
			`text,`
			`useLangChain: true,`
			`useGraphTransformer`
			`};`

			`// Add custom prompts if available`
			`if (systemPrompt) requestBody.systemPrompt = systemPrompt;`
			`if (extractionPrompt) requestBody.extractionPrompt = extractionPrompt;`
			`if (graphTransformerPrompt) requestBody.graphTransformerPrompt = graphTransformerPrompt;`

			const langchainResponse = await fetch(`${baseUrl}/api/extract-triples`, {
			`method: 'POST',`
			`headers: { 'Content-Type': 'application/json' },`
			`body: JSON.stringify(requestBody)`
			`});`

			`if (!langchainResponse.ok) {`
			`const errorText = await langchainResponse.text();`
			console.error(`LangChain API error: ${langchainResponse.status} ${langchainResponse.statusText}`, errorText);
			throw new Error(`LangChain extraction failed: ${langchainResponse.statusText} (${langchainResponse.status})`);
			`}`

			`const langchainResult = await langchainResponse.json();`
			`if (langchainResult.triples && Array.isArray(langchainResult.triples) && langchainResult.triples.length > 0) {`
			console.log(`Successfully extracted ${langchainResult.triples.length} triples using LangChain${useGraphTransformer ? ' with GraphTransformer' : ''}`);
			`triplesForProcessing = langchainResult.triples;`
			`} else {`
			`console.warn('LangChain extraction returned no triples, falling back to provided triples');`
			`}`
			`} catch (langchainError) {`
			`console.error('Error using LangChain for triple extraction:', langchainError);`
			`console.log('Falling back to provided triples');`
			`}`
			`}`

			`// Check if this is a CSV file - if so, skip processing`
			`const isCSVFile = filename && filename.toLowerCase().endsWith('.csv');`
			`const isJSONFile = filename && filename.toLowerCase().endsWith('.json');`

			`if (isCSVFile) {`
			`console.log('CSV file detected, skipping text processor');`
			`// NOTE: Neo4j storage is no longer done automatically`
			`// This is now handled manually through the "Store in Graph DB" button in the UI`
			`} else if (isJSONFile) {`
			`console.log('JSON file detected, processed as unstructured text document - embeddings can be generated manually via the UI');`
			`// NOTE: Automatic embeddings generation has been disabled for JSON files.`
			`// Embeddings are now generated only when explicitly requested through the "Generate Embeddings" button in the UI.`
			`} else {`
			`// Regular text processing flow - no automatic embeddings generation`
			`console.log('Document processed successfully - embeddings can be generated manually via the UI');`
			`// NOTE: Automatic embeddings generation has been disabled.`
			`// Embeddings are now generated only when explicitly requested through the "Generate Embeddings" button in the UI.`
			`}`

			`// Return success response`
			`return NextResponse.json({`
			`success: true,`
			`message: 'Document processed successfully',`
			`tripleCount: triplesForProcessing.length,`
			`triples: triplesForProcessing,`
			`documentName: filename \|\| 'unnamed',`
			`langchainUsed: useLangChain,`
			`graphTransformerUsed: useGraphTransformer,`
			`customPromptsUsed: !!(systemPrompt \|\| extractionPrompt \|\| graphTransformerPrompt),`
			`graphDbType: getGraphDbType()`
			`});`
			`} catch (error) {`
			`console.error('Error processing document:', error);`
			`const errorMessage = error instanceof Error ? error.message : 'Unknown error';`
			`return NextResponse.json(`
			{ error: `Failed to process document: ${errorMessage}` },
			`{ status: 500 }`
			`);`
			`}`
			`}`