dgx-spark-playbooks/nvidia/multi-agent-chatbot/assets/backend/utils.py

#
# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Utility functions for file processing and message conversion."""

import json
import os
import time
from typing import List, Dict, Any

from langchain_core.messages import HumanMessage, AIMessage, ToolMessage, ToolCall

from logger import logger
from vector_store import VectorStore


async def process_and_ingest_files_background(
    file_info: List[dict],
    vector_store: VectorStore,
    config_manager,
    task_id: str,
    indexing_tasks: Dict[str, str]
) -> None:
    """Process and ingest files in the background.

    Args:
        file_info: List of file dictionaries with 'filename' and 'content' keys
        vector_store: VectorStore instance for document indexing
        config_manager: ConfigManager instance for updating sources
        task_id: Unique identifier for this processing task
        indexing_tasks: Dictionary to track task status
    """
    try:
        logger.debug({
            "message": "Starting background file processing",
            "task_id": task_id,
            "file_count": len(file_info)
        })

        indexing_tasks[task_id] = "saving_files"

        permanent_dir = os.path.join("uploads", task_id)
        os.makedirs(permanent_dir, exist_ok=True)

        file_paths = []
        file_names = []

        for info in file_info:
            try:
                file_name = info["filename"]
                content = info["content"]

                file_path = os.path.join(permanent_dir, file_name)
                with open(file_path, "wb") as f:
                    f.write(content)

                file_paths.append(file_path)
                file_names.append(file_name)

                logger.debug({
                    "message": "Saved file",
                    "task_id": task_id,
                    "filename": file_name,
                    "path": file_path
                })
            except Exception as e:
                logger.error({
                    "message": f"Error saving file {info['filename']}",
                    "task_id": task_id,
                    "filename": info['filename'],
                    "error": str(e)
                }, exc_info=True)

        indexing_tasks[task_id] = "loading_documents"
        logger.debug({"message": "Loading documents", "task_id": task_id})

        try:
            documents = vector_store._load_documents(file_paths)

            logger.debug({
                "message": "Documents loaded, starting indexing",
                "task_id": task_id,
                "document_count": len(documents)
            })

            indexing_tasks[task_id] = "indexing_documents"
            vector_store.index_documents(documents)

            if file_names:
                config = config_manager.read_config()

                config_updated = False
                for file_name in file_names:
                    if file_name not in config.sources:
                        config.sources.append(file_name)
                        config_updated = True

                if config_updated:
                    config_manager.write_config(config)
                    logger.debug({
                        "message": "Updated config with new sources",
                        "task_id": task_id,
                        "sources": config.sources
                    })

            indexing_tasks[task_id] = "completed"
            logger.debug({
                "message": "Background processing and indexing completed successfully",
                "task_id": task_id
            })
        except Exception as e:
            indexing_tasks[task_id] = f"failed_during_indexing: {str(e)}"
            logger.error({
                "message": "Error during document loading or indexing",
                "task_id": task_id,
                "error": str(e)
            }, exc_info=True)

    except Exception as e:
        indexing_tasks[task_id] = f"failed: {str(e)}"
        logger.error({
            "message": "Error in background processing",
            "task_id": task_id,
            "error": str(e)
        }, exc_info=True)


def convert_langgraph_messages_to_openai(messages: List) -> List[Dict[str, Any]]:
    """Convert LangGraph message objects to OpenAI API format.

    Args:
        messages: List of LangGraph message objects

    Returns:
        List of dictionaries in OpenAI API format
    """
    openai_messages = []

    for msg in messages:
        if isinstance(msg, HumanMessage):
            openai_messages.append({
                "role": "user",
                "content": msg.content
            })
        elif isinstance(msg, AIMessage):
            openai_msg = {
                "role": "assistant",
                "content": msg.content or ""
            }
            if hasattr(msg, 'tool_calls') and msg.tool_calls:
                openai_msg["tool_calls"] = []
                for tc in msg.tool_calls:
                    openai_msg["tool_calls"].append({
                        "id": tc["id"],
                        "type": "function",
                        "function": {
                            "name": tc["name"],
                            "arguments": json.dumps(tc["args"])
                        }
                    })
            openai_messages.append(openai_msg)
        elif isinstance(msg, ToolMessage):
            openai_messages.append({
                "role": "tool",
                "content": msg.content,
                "tool_call_id": msg.tool_call_id
            })

    return openai_messages