mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 18:33:54 +00:00
154 lines
6.7 KiB
TypeScript
154 lines
6.7 KiB
TypeScript
|
|
import { NextRequest, NextResponse } from 'next/server';
|
||
|
|
import { RemoteBackendService } from '@/lib/remote-backend';
|
||
|
|
import { EmbeddingsService } from '@/lib/embeddings';
|
||
|
|
import type { Triple } from '@/types/graph';
|
||
|
|
import { BackendService } from '@/lib/backend-service';
|
||
|
|
import { getGraphDbType } from '../settings/route';
|
||
|
|
|
||
|
|
/**
|
||
|
|
* API endpoint for processing documents with LangChain, generating embeddings,
|
||
|
|
* and storing in the knowledge graph
|
||
|
|
* POST /api/process-document
|
||
|
|
*/
|
||
|
|
export async function POST(req: NextRequest) {
|
||
|
|
try {
|
||
|
|
// Parse request body
|
||
|
|
const body = await req.json();
|
||
|
|
const {
|
||
|
|
text,
|
||
|
|
filename,
|
||
|
|
triples,
|
||
|
|
useLangChain,
|
||
|
|
useGraphTransformer,
|
||
|
|
systemPrompt,
|
||
|
|
extractionPrompt,
|
||
|
|
graphTransformerPrompt
|
||
|
|
} = body;
|
||
|
|
|
||
|
|
if (!text || typeof text !== 'string') {
|
||
|
|
return NextResponse.json({ error: 'Text is required' }, { status: 400 });
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!triples || !Array.isArray(triples)) {
|
||
|
|
return NextResponse.json({ error: 'Triples are required' }, { status: 400 });
|
||
|
|
}
|
||
|
|
|
||
|
|
// Initialize services
|
||
|
|
const backendService = RemoteBackendService.getInstance();
|
||
|
|
const embeddingsService = EmbeddingsService.getInstance();
|
||
|
|
|
||
|
|
console.log(`🔍 API: Processing document "${filename || 'unnamed'}" (${text.length} chars)`);
|
||
|
|
console.log(`🔍 API: Processing ${triples.length} triples`);
|
||
|
|
console.log(`🔍 API: Using LangChain for triple extraction: ${useLangChain ? 'Yes' : 'No'}`);
|
||
|
|
console.log(`🔍 API: First few triples:`, triples.slice(0, 3));
|
||
|
|
if (useLangChain) {
|
||
|
|
console.log(`Using LLMGraphTransformer: ${useGraphTransformer ? 'Yes' : 'No'}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Log if custom prompts are being used
|
||
|
|
if (systemPrompt || extractionPrompt || graphTransformerPrompt) {
|
||
|
|
console.log('Using custom prompts for extraction');
|
||
|
|
if (systemPrompt) console.log('Custom system prompt provided');
|
||
|
|
if (extractionPrompt) console.log('Custom extraction prompt provided');
|
||
|
|
if (graphTransformerPrompt) console.log('Custom graph transformer prompt provided');
|
||
|
|
}
|
||
|
|
|
||
|
|
// Filter triples to ensure they are valid
|
||
|
|
const validTriples = triples.filter((triple: any) => {
|
||
|
|
return (
|
||
|
|
triple &&
|
||
|
|
typeof triple.subject === 'string' && triple.subject.trim() !== '' &&
|
||
|
|
typeof triple.predicate === 'string' && triple.predicate.trim() !== '' &&
|
||
|
|
typeof triple.object === 'string' && triple.object.trim() !== ''
|
||
|
|
);
|
||
|
|
}) as Triple[];
|
||
|
|
|
||
|
|
console.log(`Found ${validTriples.length} valid triples`);
|
||
|
|
|
||
|
|
// If useLangChain flag is set, we'll extract triples using the LangChain route
|
||
|
|
let triplesForProcessing = validTriples;
|
||
|
|
|
||
|
|
if (useLangChain && !filename?.toLowerCase().endsWith('.csv')) {
|
||
|
|
try {
|
||
|
|
console.log('Using LangChain for native triple extraction...');
|
||
|
|
// Use absolute URL with origin from request to fix URL parsing error
|
||
|
|
const baseUrl = new URL(req.url).origin;
|
||
|
|
console.log(`Using base URL: ${baseUrl} for LangChain API call`);
|
||
|
|
|
||
|
|
// Call the extract-triples endpoint with useLangChain flag and custom prompts
|
||
|
|
const requestBody: any = {
|
||
|
|
text,
|
||
|
|
useLangChain: true,
|
||
|
|
useGraphTransformer
|
||
|
|
};
|
||
|
|
|
||
|
|
// Add custom prompts if available
|
||
|
|
if (systemPrompt) requestBody.systemPrompt = systemPrompt;
|
||
|
|
if (extractionPrompt) requestBody.extractionPrompt = extractionPrompt;
|
||
|
|
if (graphTransformerPrompt) requestBody.graphTransformerPrompt = graphTransformerPrompt;
|
||
|
|
|
||
|
|
const langchainResponse = await fetch(`${baseUrl}/api/extract-triples`, {
|
||
|
|
method: 'POST',
|
||
|
|
headers: { 'Content-Type': 'application/json' },
|
||
|
|
body: JSON.stringify(requestBody)
|
||
|
|
});
|
||
|
|
|
||
|
|
if (!langchainResponse.ok) {
|
||
|
|
const errorText = await langchainResponse.text();
|
||
|
|
console.error(`LangChain API error: ${langchainResponse.status} ${langchainResponse.statusText}`, errorText);
|
||
|
|
throw new Error(`LangChain extraction failed: ${langchainResponse.statusText} (${langchainResponse.status})`);
|
||
|
|
}
|
||
|
|
|
||
|
|
const langchainResult = await langchainResponse.json();
|
||
|
|
if (langchainResult.triples && Array.isArray(langchainResult.triples) && langchainResult.triples.length > 0) {
|
||
|
|
console.log(`Successfully extracted ${langchainResult.triples.length} triples using LangChain${useGraphTransformer ? ' with GraphTransformer' : ''}`);
|
||
|
|
triplesForProcessing = langchainResult.triples;
|
||
|
|
} else {
|
||
|
|
console.warn('LangChain extraction returned no triples, falling back to provided triples');
|
||
|
|
}
|
||
|
|
} catch (langchainError) {
|
||
|
|
console.error('Error using LangChain for triple extraction:', langchainError);
|
||
|
|
console.log('Falling back to provided triples');
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Check if this is a CSV file - if so, skip processing
|
||
|
|
const isCSVFile = filename && filename.toLowerCase().endsWith('.csv');
|
||
|
|
const isJSONFile = filename && filename.toLowerCase().endsWith('.json');
|
||
|
|
|
||
|
|
if (isCSVFile) {
|
||
|
|
console.log('CSV file detected, skipping text processor');
|
||
|
|
// NOTE: Neo4j storage is no longer done automatically
|
||
|
|
// This is now handled manually through the "Store in Graph DB" button in the UI
|
||
|
|
} else if (isJSONFile) {
|
||
|
|
console.log('JSON file detected, processed as unstructured text document - embeddings can be generated manually via the UI');
|
||
|
|
// NOTE: Automatic embeddings generation has been disabled for JSON files.
|
||
|
|
// Embeddings are now generated only when explicitly requested through the "Generate Embeddings" button in the UI.
|
||
|
|
} else {
|
||
|
|
// Regular text processing flow - no automatic embeddings generation
|
||
|
|
console.log('Document processed successfully - embeddings can be generated manually via the UI');
|
||
|
|
// NOTE: Automatic embeddings generation has been disabled.
|
||
|
|
// Embeddings are now generated only when explicitly requested through the "Generate Embeddings" button in the UI.
|
||
|
|
}
|
||
|
|
|
||
|
|
// Return success response
|
||
|
|
return NextResponse.json({
|
||
|
|
success: true,
|
||
|
|
message: 'Document processed successfully',
|
||
|
|
tripleCount: triplesForProcessing.length,
|
||
|
|
triples: triplesForProcessing,
|
||
|
|
documentName: filename || 'unnamed',
|
||
|
|
langchainUsed: useLangChain,
|
||
|
|
graphTransformerUsed: useGraphTransformer,
|
||
|
|
customPromptsUsed: !!(systemPrompt || extractionPrompt || graphTransformerPrompt),
|
||
|
|
graphDbType: getGraphDbType()
|
||
|
|
});
|
||
|
|
} catch (error) {
|
||
|
|
console.error('Error processing document:', error);
|
||
|
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||
|
|
return NextResponse.json(
|
||
|
|
{ error: `Failed to process document: ${errorMessage}` },
|
||
|
|
{ status: 500 }
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|