rfcp/backend/app/services/gpu_backend.py

"""
GPU Backend Manager — detects and manages compute backends.

Supports:
  - CUDA via CuPy
  - OpenCL via PyOpenCL (future)
  - CPU via NumPy (always available)

Usage:
    from app.services.gpu_backend import gpu_manager
    xp = gpu_manager.get_array_module()   # cupy or numpy
    status = gpu_manager.get_status()
"""

import logging
from enum import Enum
from dataclasses import dataclass, field
from typing import Any, Optional

import numpy as np

logger = logging.getLogger(__name__)


class GPUBackend(str, Enum):
    CUDA = "cuda"
    OPENCL = "opencl"
    CPU = "cpu"


@dataclass
class GPUDevice:
    backend: GPUBackend
    index: int
    name: str
    memory_mb: int
    extra: dict = field(default_factory=dict)


class GPUManager:
    """Singleton GPU manager with device detection and selection."""

    def __init__(self):
        self._devices: list[GPUDevice] = []
        self._active_backend: GPUBackend = GPUBackend.CPU
        self._active_device: Optional[GPUDevice] = None
        self._cupy = None
        self._detect_devices()

    def _detect_devices(self):
        """Probe available GPU backends."""
        # Always add CPU
        cpu_device = GPUDevice(
            backend=GPUBackend.CPU,
            index=0,
            name="CPU (NumPy)",
            memory_mb=0,
        )
        self._devices.append(cpu_device)

        # Try CuPy / CUDA
        try:
            import cupy as cp
            device_count = cp.cuda.runtime.getDeviceCount()
            for i in range(device_count):
                props = cp.cuda.runtime.getDeviceProperties(i)
                name = props["name"]
                if isinstance(name, bytes):
                    name = name.decode()
                mem_mb = props["totalGlobalMem"] // (1024 * 1024)
                cuda_ver = cp.cuda.runtime.runtimeGetVersion()
                device = GPUDevice(
                    backend=GPUBackend.CUDA,
                    index=i,
                    name=str(name),
                    memory_mb=mem_mb,
                    extra={"cuda_version": cuda_ver},
                )
                self._devices.append(device)
                logger.info(f"[GPU] CUDA device {i}: {name} ({mem_mb} MB)")
            if device_count > 0:
                self._cupy = cp
        except ImportError:
            logger.info("[GPU] CuPy not installed — CUDA unavailable")
        except Exception as e:
            logger.warning(f"[GPU] CuPy probe error: {e}")

        # Try PyOpenCL (future — stub for detection only)
        try:
            import pyopencl as cl
            platforms = cl.get_platforms()
            for plat in platforms:
                for dev in plat.get_devices():
                    mem_mb = dev.global_mem_size // (1024 * 1024)
                    device = GPUDevice(
                        backend=GPUBackend.OPENCL,
                        index=len([d for d in self._devices if d.backend == GPUBackend.OPENCL]),
                        name=dev.name.strip(),
                        memory_mb=mem_mb,
                        extra={"platform": plat.name.strip()},
                    )
                    self._devices.append(device)
                    logger.info(f"[GPU] OpenCL device: {device.name} ({mem_mb} MB)")
        except ImportError:
            pass
        except Exception as e:
            logger.debug(f"[GPU] OpenCL probe error: {e}")

        # Auto-select best: prefer CUDA > OpenCL > CPU
        cuda_devices = [d for d in self._devices if d.backend == GPUBackend.CUDA]
        if cuda_devices:
            self._active_backend = GPUBackend.CUDA
            self._active_device = cuda_devices[0]
            logger.info(f"[GPU] Active backend: CUDA — {self._active_device.name}")
        else:
            self._active_backend = GPUBackend.CPU
            self._active_device = cpu_device
            logger.info("[GPU] Active backend: CPU (NumPy)")

    @property
    def gpu_available(self) -> bool:
        return self._active_backend != GPUBackend.CPU

    def get_array_module(self) -> Any:
        """Return cupy (if CUDA active) or numpy."""
        if self._active_backend == GPUBackend.CUDA and self._cupy is not None:
            return self._cupy
        return np

    def to_cpu(self, arr: Any) -> np.ndarray:
        """Transfer array to CPU numpy."""
        if hasattr(arr, 'get'):
            return arr.get()
        return np.asarray(arr)

    def get_status(self) -> dict:
        """Full status dict for API."""
        return {
            "active_backend": self._active_backend.value,
            "active_device": {
                "backend": self._active_device.backend.value,
                "index": self._active_device.index,
                "name": self._active_device.name,
                "memory_mb": self._active_device.memory_mb,
            } if self._active_device else None,
            "gpu_available": self.gpu_available,
            "available_devices": [
                {
                    "backend": d.backend.value,
                    "index": d.index,
                    "name": d.name,
                    "memory_mb": d.memory_mb,
                }
                for d in self._devices
            ],
        }

    def get_devices(self) -> list[dict]:
        """Device list for API."""
        return [
            {
                "backend": d.backend.value,
                "index": d.index,
                "name": d.name,
                "memory_mb": d.memory_mb,
            }
            for d in self._devices
        ]

    def get_diagnostics(self) -> dict:
        """Full diagnostic info for troubleshooting GPU detection."""
        import sys
        import platform
        import subprocess

        is_wsl = "microsoft" in platform.release().lower()

        diag = {
            "python_version": sys.version,
            "python_executable": sys.executable,
            "platform": platform.platform(),
            "is_wsl": is_wsl,
            "numpy": {"version": np.__version__},
            "cuda": {},
            "opencl": {},
            "nvidia_smi": None,
            "detected_devices": len(self._devices),
            "active_backend": self._active_backend.value,
        }

        # Check nvidia-smi (works even without CuPy)
        try:
            result = subprocess.run(
                ["nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv,noheader"],
                capture_output=True, text=True, timeout=5
            )
            if result.returncode == 0 and result.stdout.strip():
                diag["nvidia_smi"] = result.stdout.strip()
        except Exception:
            diag["nvidia_smi"] = "not found or error"

        # Check CuPy/CUDA
        try:
            import cupy as cp
            diag["cuda"]["cupy_version"] = cp.__version__
            diag["cuda"]["cuda_runtime_version"] = cp.cuda.runtime.runtimeGetVersion()
            diag["cuda"]["device_count"] = cp.cuda.runtime.getDeviceCount()
            for i in range(diag["cuda"]["device_count"]):
                props = cp.cuda.runtime.getDeviceProperties(i)
                name = props["name"]
                if isinstance(name, bytes):
                    name = name.decode()
                diag["cuda"][f"device_{i}"] = {
                    "name": str(name),
                    "memory_mb": props["totalGlobalMem"] // (1024 * 1024),
                    "compute_capability": f"{props['major']}.{props['minor']}",
                }
        except ImportError:
            diag["cuda"]["error"] = "CuPy not installed"
            if is_wsl:
                diag["cuda"]["install_hint"] = "pip3 install cupy-cuda12x --break-system-packages"
            else:
                diag["cuda"]["install_hint"] = "pip install cupy-cuda12x"
        except Exception as e:
            diag["cuda"]["error"] = str(e)

        # Check PyOpenCL
        try:
            import pyopencl as cl
            diag["opencl"]["pyopencl_version"] = cl.VERSION_TEXT
            diag["opencl"]["platforms"] = []
            for p in cl.get_platforms():
                platform_info = {"name": p.name.strip(), "devices": []}
                for d in p.get_devices():
                    platform_info["devices"].append({
                        "name": d.name.strip(),
                        "type": cl.device_type.to_string(d.type),
                        "memory_mb": d.global_mem_size // (1024 * 1024),
                        "compute_units": d.max_compute_units,
                    })
                diag["opencl"]["platforms"].append(platform_info)
        except ImportError:
            diag["opencl"]["error"] = "PyOpenCL not installed"
            if is_wsl:
                diag["opencl"]["install_hint"] = "pip3 install pyopencl --break-system-packages"
            else:
                diag["opencl"]["install_hint"] = "pip install pyopencl"
        except Exception as e:
            diag["opencl"]["error"] = str(e)

        return diag

    def set_device(self, backend: str, index: int = 0) -> dict:
        """Switch active compute device."""
        target_backend = GPUBackend(backend)
        candidates = [d for d in self._devices
                      if d.backend == target_backend and d.index == index]
        if not candidates:
            raise ValueError(f"No device found: backend={backend}, index={index}")

        self._active_device = candidates[0]
        self._active_backend = target_backend

        if target_backend == GPUBackend.CUDA and self._cupy is not None:
            self._cupy.cuda.Device(index).use()

        logger.info(f"[GPU] Switched to: {self._active_device.name} ({target_backend.value})")
        return {
            "backend": self._active_backend.value,
            "device": self._active_device.name,
        }


# Singleton
gpu_manager = GPUManager()