#!/usr/bin/env python3 # # SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """ Unified GPU Graph Visualization Service Combines PyGraphistry cloud processing and local GPU processing with cuGraph into a single FastAPI service for maximum flexibility. """ import os import json import numpy as np import pandas as pd from typing import Dict, List, Any, Optional, Tuple import asyncio import logging from datetime import datetime from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect, BackgroundTasks from fastapi.responses import HTMLResponse from pydantic import BaseModel import uvicorn import time from concurrent.futures import ThreadPoolExecutor import networkx as nx from enum import Enum # PyGraphistry imports import graphistry # GPU-accelerated imports (available in NVIDIA PyG container) try: import cudf import cugraph import cupy as cp from cuml import UMAP HAS_RAPIDS = True print("✓ RAPIDS cuGraph/cuDF/cuML available") except ImportError: HAS_RAPIDS = False print("⚠ RAPIDS not available, falling back to CPU") try: import torch import torch_geometric HAS_TORCH_GEOMETRIC = True print("✓ PyTorch Geometric available") except ImportError: HAS_TORCH_GEOMETRIC = False print("⚠ PyTorch Geometric not available") # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class ProcessingMode(str, Enum): PYGRAPHISTRY_CLOUD = "pygraphistry_cloud" LOCAL_GPU = "local_gpu" LOCAL_CPU = "local_cpu" class GraphPattern(str, Enum): RANDOM = "random" SCALE_FREE = "scale-free" SMALL_WORLD = "small-world" CLUSTERED = "clustered" HIERARCHICAL = "hierarchical" GRID = "grid" class GraphData(BaseModel): nodes: List[Dict[str, Any]] links: List[Dict[str, Any]] class GraphGenerationRequest(BaseModel): num_nodes: int pattern: GraphPattern = GraphPattern.SCALE_FREE avg_degree: Optional[int] = 5 num_clusters: Optional[int] = 100 small_world_k: Optional[int] = 6 small_world_p: Optional[float] = 0.1 grid_dimensions: Optional[List[int]] = [100, 100] seed: Optional[int] = None class UnifiedVisualizationRequest(BaseModel): graph_data: GraphData processing_mode: ProcessingMode = ProcessingMode.PYGRAPHISTRY_CLOUD # PyGraphistry Cloud options layout_type: Optional[str] = "force" gpu_acceleration: Optional[bool] = True clustering: Optional[bool] = False # Local GPU options layout_algorithm: Optional[str] = "force_atlas2" clustering_algorithm: Optional[str] = "leiden" compute_centrality: Optional[bool] = True class GraphGenerationStatus(BaseModel): task_id: str status: str # "running", "completed", "failed" progress: float message: str result: Optional[Dict[str, Any]] = None error: Optional[str] = None # Import graph generation classes (keeping existing code) from pygraphistry_service import LargeGraphGenerator, init_graphistry class LocalGPUProcessor: """GPU-accelerated graph processing using cuGraph""" def __init__(self): self.use_gpu = HAS_RAPIDS logger.info(f"Local GPU Processor initialized (GPU: {self.use_gpu})") def create_cugraph_from_data(self, nodes: List[Dict], edges: List[Dict]) -> Tuple['cugraph.Graph', 'cudf.DataFrame']: """Create cuGraph from node/edge data""" if not self.use_gpu: raise RuntimeError("GPU libraries not available") # Create edge dataframe edge_data = [] for edge in edges: edge_data.append({ 'src': edge['source'], 'dst': edge['target'], 'weight': edge.get('weight', 1.0) }) # Convert to cuDF edges_df = cudf.DataFrame(edge_data) # Create cuGraph G = cugraph.Graph() G.from_cudf_edgelist(edges_df, source='src', destination='dst', edge_attr='weight') return G, edges_df def compute_gpu_layout(self, G, algorithm: str = "force_atlas2") -> Dict[str, Tuple[float, float]]: """Compute GPU-accelerated graph layout""" try: if algorithm == "force_atlas2": layout_df = cugraph.force_atlas2(G) elif algorithm == "fruchterman_reingold": layout_df = cugraph.spectral_layout(G, dim=2) else: # spectral layout_df = cugraph.spectral_layout(G, dim=2) # Convert to dictionary positions = {} for _, row in layout_df.iterrows(): node_id = str(row['vertex']) positions[node_id] = (float(row['x']), float(row['y'])) logger.info(f"Computed {algorithm} layout for {len(positions)} nodes on GPU") return positions except Exception as e: logger.error(f"GPU layout computation failed: {e}") return {} def compute_gpu_clustering(self, G, algorithm: str = "leiden") -> Dict[str, int]: """Compute GPU-accelerated community detection""" try: if algorithm == "leiden": clusters_df, modularity = cugraph.leiden(G) elif algorithm == "louvain": clusters_df, modularity = cugraph.louvain(G) else: # spectral clustering clusters_df = cugraph.spectral_clustering(G, n_clusters=10) modularity = 0.0 # Convert to dictionary clusters = {} for _, row in clusters_df.iterrows(): node_id = str(row['vertex']) clusters[node_id] = int(row['partition']) logger.info(f"Computed {algorithm} clustering on GPU (modularity: {modularity:.3f})") return clusters except Exception as e: logger.error(f"GPU clustering failed: {e}") return {} def compute_gpu_centrality(self, G) -> Dict[str, Dict[str, float]]: """Compute GPU-accelerated centrality measures""" centrality_data = {} try: # PageRank pagerank_df = cugraph.pagerank(G) pagerank = {} for _, row in pagerank_df.iterrows(): pagerank[str(row['vertex'])] = float(row['pagerank']) centrality_data['pagerank'] = pagerank # Betweenness centrality (for smaller graphs) if G.number_of_vertices() < 5000: betweenness_df = cugraph.betweenness_centrality(G) betweenness = {} for _, row in betweenness_df.iterrows(): betweenness[str(row['vertex'])] = float(row['betweenness_centrality']) centrality_data['betweenness'] = betweenness logger.info(f"Computed centrality measures on GPU") return centrality_data except Exception as e: logger.error(f"GPU centrality computation failed: {e}") return {} class PyGraphistryProcessor: """PyGraphistry cloud processing (existing functionality)""" def __init__(self): self.initialized = init_graphistry() async def process_graph_data(self, request: UnifiedVisualizationRequest) -> Dict[str, Any]: """Process graph data with PyGraphistry GPU acceleration""" try: if not self.initialized: raise HTTPException(status_code=500, detail="PyGraphistry not initialized") # Convert to pandas DataFrames for PyGraphistry nodes_df = pd.DataFrame(request.graph_data.nodes) edges_df = pd.DataFrame(request.graph_data.links) # Ensure required columns exist if 'id' not in nodes_df.columns: nodes_df['id'] = nodes_df.index if 'source' not in edges_df.columns or 'target' not in edges_df.columns: raise HTTPException(status_code=400, detail="Links must have source and target columns") logger.info(f"Processing graph with {len(nodes_df)} nodes and {len(edges_df)} edges") # Create PyGraphistry graph object g = graphistry.edges(edges_df, 'source', 'target').nodes(nodes_df, 'id') # Apply GPU-accelerated processing if request.gpu_acceleration: g = await self._apply_gpu_acceleration(g, request) # Apply clustering if requested if request.clustering: g = await self._apply_clustering(g) # Generate layout g = await self._generate_layout(g, request.layout_type) # Extract processed data processed_nodes = g._nodes.to_dict('records') if g._nodes is not None else nodes_df.to_dict('records') processed_edges = g._edges.to_dict('records') if g._edges is not None else edges_df.to_dict('records') # Generate embedding URL for interactive visualization embed_url = None local_viz_data = None try: embed_url = g.plot(render=False) logger.info(f"Generated PyGraphistry embed URL: {embed_url}") except Exception as e: logger.warning(f"Could not generate embed URL (likely running in local mode): {e}") # Create local visualization data as fallback try: local_viz_data = self._create_local_viz_data(g, processed_nodes, processed_edges) logger.info("Generated local visualization data as fallback") except Exception as viz_e: logger.warning(f"Could not generate local visualization data: {viz_e}") return { "processed_nodes": processed_nodes, "processed_edges": processed_edges, "embed_url": embed_url, "local_viz_data": local_viz_data, "processing_mode": ProcessingMode.PYGRAPHISTRY_CLOUD, "stats": { "node_count": len(processed_nodes), "edge_count": len(processed_edges), "gpu_accelerated": request.gpu_acceleration, "clustered": request.clustering, "layout_type": request.layout_type, "has_embed_url": embed_url is not None, "has_local_viz": local_viz_data is not None }, "timestamp": datetime.now().isoformat() } except Exception as e: logger.error(f"Error processing graph data: {e}") raise HTTPException(status_code=500, detail=str(e)) # ... (include other PyGraphistry methods from original service) async def _apply_gpu_acceleration(self, g, request): # Implementation from original service pass async def _apply_clustering(self, g): # Implementation from original service pass async def _generate_layout(self, g, layout_type): # Implementation from original service pass def _create_local_viz_data(self, g, processed_nodes, processed_edges): # Implementation from original service pass class UnifiedGPUService: """Unified service offering both PyGraphistry cloud and local GPU processing""" def __init__(self): self.pygraphistry_processor = PyGraphistryProcessor() self.local_gpu_processor = LocalGPUProcessor() self.generation_tasks = {} self.executor = ThreadPoolExecutor(max_workers=4) self.active_connections: List[WebSocket] = [] async def process_graph(self, request: UnifiedVisualizationRequest) -> Dict[str, Any]: """Process graph with selected processing mode""" if request.processing_mode == ProcessingMode.PYGRAPHISTRY_CLOUD: return await self.pygraphistry_processor.process_graph_data(request) elif request.processing_mode == ProcessingMode.LOCAL_GPU: return await self._process_with_local_gpu(request) else: # LOCAL_CPU return await self._process_with_local_cpu(request) async def _process_with_local_gpu(self, request: UnifiedVisualizationRequest) -> Dict[str, Any]: """Process graph with local GPU acceleration""" try: nodes = request.graph_data.nodes edges = request.graph_data.links result = { "processed_nodes": nodes.copy(), "processed_edges": edges.copy(), "processing_mode": ProcessingMode.LOCAL_GPU, "gpu_processed": False, "layout_positions": {}, "clusters": {}, "centrality": {}, "stats": {}, "timestamp": datetime.now().isoformat() } if self.local_gpu_processor.use_gpu: logger.info("=== LOCAL GPU PROCESSING START ===") # Create cuGraph G, edges_df = self.local_gpu_processor.create_cugraph_from_data(nodes, edges) # Compute layout on GPU positions = self.local_gpu_processor.compute_gpu_layout(G, request.layout_algorithm) if positions: result["layout_positions"] = positions # Add positions to nodes for node in result["processed_nodes"]: node_id = str(node["id"]) if node_id in positions: node["x"], node["y"] = positions[node_id] # Compute clustering on GPU clusters = self.local_gpu_processor.compute_gpu_clustering(G, request.clustering_algorithm) if clusters: result["clusters"] = clusters # Add cluster info to nodes for node in result["processed_nodes"]: node_id = str(node["id"]) if node_id in clusters: node["cluster"] = clusters[node_id] # Compute centrality on GPU if request.compute_centrality: centrality = self.local_gpu_processor.compute_gpu_centrality(G) result["centrality"] = centrality # Add centrality to nodes for node in result["processed_nodes"]: node_id = str(node["id"]) for metric, values in centrality.items(): if node_id in values: node[metric] = values[node_id] result["gpu_processed"] = True result["stats"] = { "node_count": len(nodes), "edge_count": len(edges), "gpu_accelerated": True, "layout_computed": len(positions) > 0, "clusters_computed": len(clusters) > 0, "centrality_computed": len(centrality) > 0 } logger.info("=== LOCAL GPU PROCESSING COMPLETE ===") return result except Exception as e: logger.error(f"Local GPU processing failed: {e}") raise HTTPException(status_code=500, detail=str(e)) async def _process_with_local_cpu(self, request: UnifiedVisualizationRequest) -> Dict[str, Any]: """Process graph with local CPU (NetworkX fallback)""" # Simple CPU fallback using NetworkX nodes = request.graph_data.nodes edges = request.graph_data.links return { "processed_nodes": nodes, "processed_edges": edges, "processing_mode": ProcessingMode.LOCAL_CPU, "gpu_processed": False, "stats": { "node_count": len(nodes), "edge_count": len(edges), "gpu_accelerated": False }, "timestamp": datetime.now().isoformat() } async def broadcast_update(self, data: Dict[str, Any]): """Broadcast updates to all connected WebSocket clients""" if self.active_connections: message = json.dumps(data) for connection in self.active_connections.copy(): try: await connection.send_text(message) except WebSocketDisconnect: self.active_connections.remove(connection) # FastAPI app app = FastAPI(title="Unified GPU Graph Visualization Service", version="2.0.0") service = UnifiedGPUService() @app.post("/api/visualize") async def visualize_graph(request: UnifiedVisualizationRequest): """Process graph with unified service (supports all processing modes)""" result = await service.process_graph(request) # Broadcast to connected WebSocket clients await service.broadcast_update({ "type": "graph_processed", "data": result }) return result @app.post("/api/generate") async def generate_graph(request: GraphGenerationRequest): """Start graph generation as background task""" if request.num_nodes > 1000000: raise HTTPException(status_code=400, detail="Maximum 1 million nodes allowed") # Use existing graph generation logic task_id = f"gen_{int(time.time() * 1000)}" # Implementation would go here... return {"task_id": task_id, "status": "started"} @app.get("/api/capabilities") async def get_capabilities(): """Get GPU capabilities and available processing modes""" return { "processing_modes": { "pygraphistry_cloud": { "available": service.pygraphistry_processor.initialized, "description": "PyGraphistry cloud GPU processing with interactive embeds" }, "local_gpu": { "available": HAS_RAPIDS, "description": "Local GPU processing with cuGraph/RAPIDS" }, "local_cpu": { "available": True, "description": "Local CPU fallback processing" } }, "has_rapids": HAS_RAPIDS, "has_torch_geometric": HAS_TORCH_GEOMETRIC, "gpu_available": HAS_RAPIDS, "supported_layouts": ["force_atlas2", "spectral", "fruchterman_reingold"], "supported_clustering": ["leiden", "louvain", "spectral"] } @app.websocket("/ws") async def websocket_endpoint(websocket: WebSocket): """WebSocket endpoint for real-time updates""" await websocket.accept() service.active_connections.append(websocket) try: while True: await websocket.receive_text() except WebSocketDisconnect: service.active_connections.remove(websocket) @app.get("/api/sample-graph") async def get_sample_graph(): """Get a sample graph for testing""" return { "nodes": [ {"id": "1", "name": "Central Hub", "group": "core"}, {"id": "2", "name": "Data Source A", "group": "input"}, {"id": "3", "name": "Data Source B", "group": "input"}, {"id": "4", "name": "Processing Unit", "group": "compute"}, {"id": "5", "name": "Output A", "group": "output"}, {"id": "6", "name": "Output B", "group": "output"}, {"id": "7", "name": "Analytics", "group": "analysis"}, {"id": "8", "name": "Storage", "group": "storage"} ], "links": [ {"source": "2", "target": "1", "name": "data_feed"}, {"source": "3", "target": "1", "name": "data_feed"}, {"source": "1", "target": "4", "name": "process"}, {"source": "4", "target": "5", "name": "output"}, {"source": "4", "target": "6", "name": "output"}, {"source": "1", "target": "7", "name": "analyze"}, {"source": "1", "target": "8", "name": "store"} ] } @app.get("/api/health") async def health_check(): """Health check endpoint""" return { "status": "healthy", "pygraphistry_initialized": service.pygraphistry_processor.initialized, "local_gpu_available": HAS_RAPIDS, "torch_geometric": HAS_TORCH_GEOMETRIC, "timestamp": datetime.now().isoformat() } @app.get("/", response_class=HTMLResponse) async def get_visualization_page(): """Serve the interactive visualization page""" return """