@mytec: iter3.5.0 ready for testing

2026-02-03 10:32:38 +02:00
parent f46bf16428
commit 3b36535d4e
17 changed files with 860 additions and 68 deletions
--- a/backend/app/api/routes/gpu.py
+++ b/backend/app/api/routes/gpu.py
@@ -0,0 +1,35 @@
+"""GPU management API endpoints."""
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from app.services.gpu_backend import gpu_manager
+
+router = APIRouter()
+
+
+class SetDeviceRequest(BaseModel):
+    backend: str
+    index: int = 0
+
+
+@router.get("/status")
+async def gpu_status():
+    """Return GPU manager status: active backend, device, available devices."""
+    return gpu_manager.get_status()
+
+
+@router.get("/devices")
+async def gpu_devices():
+    """Return list of available compute devices."""
+    return {"devices": gpu_manager.get_devices()}
+
+
+@router.post("/set")
+async def gpu_set_device(request: SetDeviceRequest):
+    """Switch active compute device."""
+    try:
+        result = gpu_manager.set_device(request.backend, request.index)
+        return {"status": "ok", **result}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -4,7 +4,7 @@ from fastapi import FastAPI, WebSocket
 from fastapi.middleware.cors import CORSMiddleware

 from app.core.database import connect_to_mongo, close_mongo_connection
-from app.api.routes import health, projects, terrain, coverage, regions, system
+from app.api.routes import health, projects, terrain, coverage, regions, system, gpu
 from app.api.websocket import websocket_endpoint


@@ -38,6 +38,7 @@ app.include_router(terrain.router, prefix="/api/terrain", tags=["terrain"])
 app.include_router(coverage.router, prefix="/api/coverage", tags=["coverage"])
 app.include_router(regions.router, prefix="/api/regions", tags=["regions"])
 app.include_router(system.router, prefix="/api/system", tags=["system"])
+app.include_router(gpu.router, prefix="/api/gpu", tags=["gpu"])

 # WebSocket endpoint for real-time coverage with progress
 app.websocket("/ws")(websocket_endpoint)
--- a/backend/app/services/coverage_service.py
+++ b/backend/app/services/coverage_service.py
@@ -247,6 +247,9 @@ class CoverageSettings(BaseModel):
    temperature_c: float = 15.0
    humidity_percent: float = 50.0

+    # Fading margin (dB) — additional safety loss subtracted from RSRP
+    fading_margin: float = 0.0
+
    # Preset
    preset: Optional[str] = None  # fast, standard, detailed, full

@@ -1362,7 +1365,8 @@ class CoverageService:
        rsrp = (site.power + site.gain - path_loss - antenna_loss
                - terrain_loss - building_loss - veg_loss
                - rain_loss - indoor_loss - atmo_loss
-                + reflection_gain)
+                + reflection_gain
+                - settings.fading_margin)

        return CoveragePoint(
            lat=lat, lon=lon, rsrp=rsrp, distance=distance,
@@ -1508,7 +1512,8 @@ class CoverageService:
                        )

                rsrp = (site.power + site.gain - path_loss
-                        - antenna_loss - terrain_loss)
+                        - antenna_loss - terrain_loss
+                        - settings.fading_margin)

                if rsrp >= settings.min_signal:
                    points.append(CoveragePoint(
--- a/backend/app/services/gpu_backend.py
+++ b/backend/app/services/gpu_backend.py
@@ -0,0 +1,192 @@
+"""
+GPU Backend Manager — detects and manages compute backends.
+
+Supports:
+  - CUDA via CuPy
+  - OpenCL via PyOpenCL (future)
+  - CPU via NumPy (always available)
+
+Usage:
+    from app.services.gpu_backend import gpu_manager
+    xp = gpu_manager.get_array_module()   # cupy or numpy
+    status = gpu_manager.get_status()
+"""
+
+import logging
+from enum import Enum
+from dataclasses import dataclass, field
+from typing import Any, Optional
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class GPUBackend(str, Enum):
+    CUDA = "cuda"
+    OPENCL = "opencl"
+    CPU = "cpu"
+
+
+@dataclass
+class GPUDevice:
+    backend: GPUBackend
+    index: int
+    name: str
+    memory_mb: int
+    extra: dict = field(default_factory=dict)
+
+
+class GPUManager:
+    """Singleton GPU manager with device detection and selection."""
+
+    def __init__(self):
+        self._devices: list[GPUDevice] = []
+        self._active_backend: GPUBackend = GPUBackend.CPU
+        self._active_device: Optional[GPUDevice] = None
+        self._cupy = None
+        self._detect_devices()
+
+    def _detect_devices(self):
+        """Probe available GPU backends."""
+        # Always add CPU
+        cpu_device = GPUDevice(
+            backend=GPUBackend.CPU,
+            index=0,
+            name="CPU (NumPy)",
+            memory_mb=0,
+        )
+        self._devices.append(cpu_device)
+
+        # Try CuPy / CUDA
+        try:
+            import cupy as cp
+            device_count = cp.cuda.runtime.getDeviceCount()
+            for i in range(device_count):
+                props = cp.cuda.runtime.getDeviceProperties(i)
+                name = props["name"]
+                if isinstance(name, bytes):
+                    name = name.decode()
+                mem_mb = props["totalGlobalMem"] // (1024 * 1024)
+                cuda_ver = cp.cuda.runtime.runtimeGetVersion()
+                device = GPUDevice(
+                    backend=GPUBackend.CUDA,
+                    index=i,
+                    name=str(name),
+                    memory_mb=mem_mb,
+                    extra={"cuda_version": cuda_ver},
+                )
+                self._devices.append(device)
+                logger.info(f"[GPU] CUDA device {i}: {name} ({mem_mb} MB)")
+            if device_count > 0:
+                self._cupy = cp
+        except ImportError:
+            logger.info("[GPU] CuPy not installed — CUDA unavailable")
+        except Exception as e:
+            logger.warning(f"[GPU] CuPy probe error: {e}")
+
+        # Try PyOpenCL (future — stub for detection only)
+        try:
+            import pyopencl as cl
+            platforms = cl.get_platforms()
+            for plat in platforms:
+                for dev in plat.get_devices():
+                    mem_mb = dev.global_mem_size // (1024 * 1024)
+                    device = GPUDevice(
+                        backend=GPUBackend.OPENCL,
+                        index=len([d for d in self._devices if d.backend == GPUBackend.OPENCL]),
+                        name=dev.name.strip(),
+                        memory_mb=mem_mb,
+                        extra={"platform": plat.name.strip()},
+                    )
+                    self._devices.append(device)
+                    logger.info(f"[GPU] OpenCL device: {device.name} ({mem_mb} MB)")
+        except ImportError:
+            pass
+        except Exception as e:
+            logger.debug(f"[GPU] OpenCL probe error: {e}")
+
+        # Auto-select best: prefer CUDA > OpenCL > CPU
+        cuda_devices = [d for d in self._devices if d.backend == GPUBackend.CUDA]
+        if cuda_devices:
+            self._active_backend = GPUBackend.CUDA
+            self._active_device = cuda_devices[0]
+            logger.info(f"[GPU] Active backend: CUDA — {self._active_device.name}")
+        else:
+            self._active_backend = GPUBackend.CPU
+            self._active_device = cpu_device
+            logger.info("[GPU] Active backend: CPU (NumPy)")
+
+    @property
+    def gpu_available(self) -> bool:
+        return self._active_backend != GPUBackend.CPU
+
+    def get_array_module(self) -> Any:
+        """Return cupy (if CUDA active) or numpy."""
+        if self._active_backend == GPUBackend.CUDA and self._cupy is not None:
+            return self._cupy
+        return np
+
+    def to_cpu(self, arr: Any) -> np.ndarray:
+        """Transfer array to CPU numpy."""
+        if hasattr(arr, 'get'):
+            return arr.get()
+        return np.asarray(arr)
+
+    def get_status(self) -> dict:
+        """Full status dict for API."""
+        return {
+            "active_backend": self._active_backend.value,
+            "active_device": {
+                "backend": self._active_device.backend.value,
+                "index": self._active_device.index,
+                "name": self._active_device.name,
+                "memory_mb": self._active_device.memory_mb,
+            } if self._active_device else None,
+            "gpu_available": self.gpu_available,
+            "available_devices": [
+                {
+                    "backend": d.backend.value,
+                    "index": d.index,
+                    "name": d.name,
+                    "memory_mb": d.memory_mb,
+                }
+                for d in self._devices
+            ],
+        }
+
+    def get_devices(self) -> list[dict]:
+        """Device list for API."""
+        return [
+            {
+                "backend": d.backend.value,
+                "index": d.index,
+                "name": d.name,
+                "memory_mb": d.memory_mb,
+            }
+            for d in self._devices
+        ]
+
+    def set_device(self, backend: str, index: int = 0) -> dict:
+        """Switch active compute device."""
+        target_backend = GPUBackend(backend)
+        candidates = [d for d in self._devices
+                      if d.backend == target_backend and d.index == index]
+        if not candidates:
+            raise ValueError(f"No device found: backend={backend}, index={index}")
+
+        self._active_device = candidates[0]
+        self._active_backend = target_backend
+
+        if target_backend == GPUBackend.CUDA and self._cupy is not None:
+            self._cupy.cuda.Device(index).use()
+
+        logger.info(f"[GPU] Switched to: {self._active_device.name} ({target_backend.value})")
+        return {
+            "backend": self._active_backend.value,
+            "device": self._active_device.name,
+        }
+
+
+# Singleton
+gpu_manager = GPUManager()
--- a/backend/app/services/gpu_service.py
+++ b/backend/app/services/gpu_service.py
@@ -3,7 +3,7 @@ GPU-accelerated computation service using CuPy.
 Falls back to NumPy when CuPy/CUDA is not available.

 Provides vectorized batch operations for coverage calculation:
-  - Haversine distance (site → all grid points)
+  - Haversine distance (site -> all grid points)
  - Okumura-Hata path loss (all distances at once)

 Usage:
@@ -11,48 +11,29 @@ Usage:
 """

 import numpy as np
-from typing import Dict, Any, Optional
+from typing import Dict, Any

-# ── Try CuPy import ──
-
-GPU_AVAILABLE = False
-GPU_INFO: Optional[Dict[str, Any]] = None
-cp = None
-
-try:
-    import cupy as _cp
-    device_count = _cp.cuda.runtime.getDeviceCount()
-    if device_count > 0:
-        cp = _cp
-        GPU_AVAILABLE = True
-        props = _cp.cuda.runtime.getDeviceProperties(0)
-        GPU_INFO = {
-            "name": props["name"].decode() if isinstance(props["name"], bytes) else str(props["name"]),
-            "memory_mb": props["totalGlobalMem"] // (1024 * 1024),
-            "cuda_version": _cp.cuda.runtime.runtimeGetVersion(),
-        }
-        print(f"[GPU] CUDA available: {GPU_INFO['name']} ({GPU_INFO['memory_mb']} MB)", flush=True)
-    else:
-        print("[GPU] No CUDA devices found", flush=True)
-except ImportError:
-    print("[GPU] CuPy not installed — using CPU/NumPy", flush=True)
-    print("[GPU]   To enable GPU acceleration, install CuPy:", flush=True)
-    print("[GPU]   For CUDA 12.x:  pip install cupy-cuda12x", flush=True)
-    print("[GPU]   For CUDA 11.x:  pip install cupy-cuda11x", flush=True)
-    print("[GPU]   Check CUDA version: nvidia-smi", flush=True)
-except Exception as e:
-    print(f"[GPU] CuPy error: {e} — GPU acceleration disabled", flush=True)
+from app.services.gpu_backend import gpu_manager

+# Backward-compatible exports
+GPU_AVAILABLE = gpu_manager.gpu_available
+GPU_INFO: Dict[str, Any] | None = (
+    {
+        "name": gpu_manager._active_device.name,
+        "memory_mb": gpu_manager._active_device.memory_mb,
+        **gpu_manager._active_device.extra,
+    }
+    if gpu_manager.gpu_available and gpu_manager._active_device
+    else None
+)

 # Array module: cupy on GPU, numpy on CPU
-xp = cp if GPU_AVAILABLE else np
+xp = gpu_manager.get_array_module()


 def _to_cpu(arr):
    """Transfer array to CPU numpy if on GPU."""
-    if GPU_AVAILABLE and hasattr(arr, 'get'):
-        return arr.get()
-    return np.asarray(arr)
+    return gpu_manager.to_cpu(arr)


 class GPUService:
@@ -60,13 +41,13 @@ class GPUService:

    @property
    def available(self) -> bool:
-        return GPU_AVAILABLE
+        return gpu_manager.gpu_available

    def get_info(self) -> Dict[str, Any]:
        """Return GPU info dict for system endpoint."""
-        if not GPU_AVAILABLE:
+        if not gpu_manager.gpu_available:
            return {"available": False, "name": None, "memory_mb": None}
-        return {"available": True, **GPU_INFO}
+        return {"available": True, **(GPU_INFO or {})}

    def precompute_distances(
        self,
@@ -79,16 +60,17 @@ class GPUService:

        Returns distances in meters as a CPU numpy array.
        """
-        lat1 = xp.radians(xp.asarray(grid_lats, dtype=xp.float64))
-        lon1 = xp.radians(xp.asarray(grid_lons, dtype=xp.float64))
-        lat2 = xp.radians(xp.float64(site_lat))
-        lon2 = xp.radians(xp.float64(site_lon))
+        _xp = gpu_manager.get_array_module()
+        lat1 = _xp.radians(_xp.asarray(grid_lats, dtype=_xp.float64))
+        lon1 = _xp.radians(_xp.asarray(grid_lons, dtype=_xp.float64))
+        lat2 = _xp.radians(_xp.float64(site_lat))
+        lon2 = _xp.radians(_xp.float64(site_lon))

        dlat = lat2 - lat1
        dlon = lon2 - lon1

-        a = xp.sin(dlat / 2) ** 2 + xp.cos(lat1) * xp.cos(lat2) * xp.sin(dlon / 2) ** 2
-        c = 2 * xp.arcsin(xp.sqrt(a))
+        a = _xp.sin(dlat / 2) ** 2 + _xp.cos(lat1) * _xp.cos(lat2) * _xp.sin(dlon / 2) ** 2
+        c = 2 * _xp.arcsin(_xp.sqrt(a))

        distances = 6371000.0 * c
        return _to_cpu(distances)
@@ -108,40 +90,41 @@ class GPUService:

        Returns path loss in dB as a CPU numpy array.
        """
-        d_arr = xp.asarray(distances, dtype=xp.float64)
-        d_km = xp.maximum(d_arr / 1000.0, 0.1)
+        _xp = gpu_manager.get_array_module()
+        d_arr = _xp.asarray(distances, dtype=_xp.float64)
+        d_km = _xp.maximum(d_arr / 1000.0, 0.1)

        freq = float(frequency_mhz)
        h_tx = max(float(tx_height), 1.0)
        h_rx = max(float(rx_height), 1.0)

-        log_f = xp.log10(xp.float64(freq))
-        log_hb = xp.log10(xp.float64(max(h_tx, 1.0)))
+        log_f = _xp.log10(_xp.float64(freq))
+        log_hb = _xp.log10(_xp.float64(max(h_tx, 1.0)))

        if freq > 2000:
            # Free-Space Path Loss: FSPL = 20*log10(d_km) + 20*log10(f) + 32.45
-            L = 20.0 * xp.log10(d_km) + 20.0 * log_f + 32.45
+            L = 20.0 * _xp.log10(d_km) + 20.0 * log_f + 32.45

        elif freq > 1500:
            # COST-231 Hata: extends Okumura-Hata to 1500-2000 MHz
            a_hm = (1.1 * log_f - 0.7) * h_rx - (1.56 * log_f - 0.8)
            L = (46.3 + 33.9 * log_f - 13.82 * log_hb - a_hm
-                 + (44.9 - 6.55 * log_hb) * xp.log10(d_km))
+                 + (44.9 - 6.55 * log_hb) * _xp.log10(d_km))
            if environment == "urban":
                L += 3.0  # Metropolitan center correction

        elif freq >= 150:
            # Okumura-Hata: 150-1500 MHz
            if environment == "urban" and freq >= 400:
-                a_hm = 3.2 * (xp.log10(11.75 * h_rx) ** 2) - 4.97
+                a_hm = 3.2 * (_xp.log10(11.75 * h_rx) ** 2) - 4.97
            else:
                a_hm = (1.1 * log_f - 0.7) * h_rx - (1.56 * log_f - 0.8)

            L_urban = (69.55 + 26.16 * log_f - 13.82 * log_hb - a_hm
-                       + (44.9 - 6.55 * log_hb) * xp.log10(d_km))
+                       + (44.9 - 6.55 * log_hb) * _xp.log10(d_km))

            if environment == "suburban":
-                L = L_urban - 2 * (xp.log10(freq / 28) ** 2) - 5.4
+                L = L_urban - 2 * (_xp.log10(freq / 28) ** 2) - 5.4
            elif environment == "rural":
                L = L_urban - 4.78 * (log_f ** 2) + 18.33 * log_f - 35.94
            elif environment == "open":
@@ -152,7 +135,7 @@ class GPUService:
        else:
            # Very low frequency — Longley-Rice simplified (area mode)
            # Use FSPL as baseline with terrain roughness correction
-            L = 20.0 * xp.log10(d_km) + 20.0 * log_f + 32.45 + 10.0
+            L = 20.0 * _xp.log10(d_km) + 20.0 * log_f + 32.45 + 10.0

        return _to_cpu(L)