@mytec: iter3.5.0 ready for testing
This commit is contained in:
35
backend/app/api/routes/gpu.py
Normal file
35
backend/app/api/routes/gpu.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""GPU management API endpoints."""
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from app.services.gpu_backend import gpu_manager
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
class SetDeviceRequest(BaseModel):
|
||||
backend: str
|
||||
index: int = 0
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def gpu_status():
|
||||
"""Return GPU manager status: active backend, device, available devices."""
|
||||
return gpu_manager.get_status()
|
||||
|
||||
|
||||
@router.get("/devices")
|
||||
async def gpu_devices():
|
||||
"""Return list of available compute devices."""
|
||||
return {"devices": gpu_manager.get_devices()}
|
||||
|
||||
|
||||
@router.post("/set")
|
||||
async def gpu_set_device(request: SetDeviceRequest):
|
||||
"""Switch active compute device."""
|
||||
try:
|
||||
result = gpu_manager.set_device(request.backend, request.index)
|
||||
return {"status": "ok", **result}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
@@ -4,7 +4,7 @@ from fastapi import FastAPI, WebSocket
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.database import connect_to_mongo, close_mongo_connection
|
||||
from app.api.routes import health, projects, terrain, coverage, regions, system
|
||||
from app.api.routes import health, projects, terrain, coverage, regions, system, gpu
|
||||
from app.api.websocket import websocket_endpoint
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@ app.include_router(terrain.router, prefix="/api/terrain", tags=["terrain"])
|
||||
app.include_router(coverage.router, prefix="/api/coverage", tags=["coverage"])
|
||||
app.include_router(regions.router, prefix="/api/regions", tags=["regions"])
|
||||
app.include_router(system.router, prefix="/api/system", tags=["system"])
|
||||
app.include_router(gpu.router, prefix="/api/gpu", tags=["gpu"])
|
||||
|
||||
# WebSocket endpoint for real-time coverage with progress
|
||||
app.websocket("/ws")(websocket_endpoint)
|
||||
|
||||
@@ -247,6 +247,9 @@ class CoverageSettings(BaseModel):
|
||||
temperature_c: float = 15.0
|
||||
humidity_percent: float = 50.0
|
||||
|
||||
# Fading margin (dB) — additional safety loss subtracted from RSRP
|
||||
fading_margin: float = 0.0
|
||||
|
||||
# Preset
|
||||
preset: Optional[str] = None # fast, standard, detailed, full
|
||||
|
||||
@@ -1362,7 +1365,8 @@ class CoverageService:
|
||||
rsrp = (site.power + site.gain - path_loss - antenna_loss
|
||||
- terrain_loss - building_loss - veg_loss
|
||||
- rain_loss - indoor_loss - atmo_loss
|
||||
+ reflection_gain)
|
||||
+ reflection_gain
|
||||
- settings.fading_margin)
|
||||
|
||||
return CoveragePoint(
|
||||
lat=lat, lon=lon, rsrp=rsrp, distance=distance,
|
||||
@@ -1508,7 +1512,8 @@ class CoverageService:
|
||||
)
|
||||
|
||||
rsrp = (site.power + site.gain - path_loss
|
||||
- antenna_loss - terrain_loss)
|
||||
- antenna_loss - terrain_loss
|
||||
- settings.fading_margin)
|
||||
|
||||
if rsrp >= settings.min_signal:
|
||||
points.append(CoveragePoint(
|
||||
|
||||
192
backend/app/services/gpu_backend.py
Normal file
192
backend/app/services/gpu_backend.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""
|
||||
GPU Backend Manager — detects and manages compute backends.
|
||||
|
||||
Supports:
|
||||
- CUDA via CuPy
|
||||
- OpenCL via PyOpenCL (future)
|
||||
- CPU via NumPy (always available)
|
||||
|
||||
Usage:
|
||||
from app.services.gpu_backend import gpu_manager
|
||||
xp = gpu_manager.get_array_module() # cupy or numpy
|
||||
status = gpu_manager.get_status()
|
||||
"""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GPUBackend(str, Enum):
|
||||
CUDA = "cuda"
|
||||
OPENCL = "opencl"
|
||||
CPU = "cpu"
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPUDevice:
|
||||
backend: GPUBackend
|
||||
index: int
|
||||
name: str
|
||||
memory_mb: int
|
||||
extra: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class GPUManager:
|
||||
"""Singleton GPU manager with device detection and selection."""
|
||||
|
||||
def __init__(self):
|
||||
self._devices: list[GPUDevice] = []
|
||||
self._active_backend: GPUBackend = GPUBackend.CPU
|
||||
self._active_device: Optional[GPUDevice] = None
|
||||
self._cupy = None
|
||||
self._detect_devices()
|
||||
|
||||
def _detect_devices(self):
|
||||
"""Probe available GPU backends."""
|
||||
# Always add CPU
|
||||
cpu_device = GPUDevice(
|
||||
backend=GPUBackend.CPU,
|
||||
index=0,
|
||||
name="CPU (NumPy)",
|
||||
memory_mb=0,
|
||||
)
|
||||
self._devices.append(cpu_device)
|
||||
|
||||
# Try CuPy / CUDA
|
||||
try:
|
||||
import cupy as cp
|
||||
device_count = cp.cuda.runtime.getDeviceCount()
|
||||
for i in range(device_count):
|
||||
props = cp.cuda.runtime.getDeviceProperties(i)
|
||||
name = props["name"]
|
||||
if isinstance(name, bytes):
|
||||
name = name.decode()
|
||||
mem_mb = props["totalGlobalMem"] // (1024 * 1024)
|
||||
cuda_ver = cp.cuda.runtime.runtimeGetVersion()
|
||||
device = GPUDevice(
|
||||
backend=GPUBackend.CUDA,
|
||||
index=i,
|
||||
name=str(name),
|
||||
memory_mb=mem_mb,
|
||||
extra={"cuda_version": cuda_ver},
|
||||
)
|
||||
self._devices.append(device)
|
||||
logger.info(f"[GPU] CUDA device {i}: {name} ({mem_mb} MB)")
|
||||
if device_count > 0:
|
||||
self._cupy = cp
|
||||
except ImportError:
|
||||
logger.info("[GPU] CuPy not installed — CUDA unavailable")
|
||||
except Exception as e:
|
||||
logger.warning(f"[GPU] CuPy probe error: {e}")
|
||||
|
||||
# Try PyOpenCL (future — stub for detection only)
|
||||
try:
|
||||
import pyopencl as cl
|
||||
platforms = cl.get_platforms()
|
||||
for plat in platforms:
|
||||
for dev in plat.get_devices():
|
||||
mem_mb = dev.global_mem_size // (1024 * 1024)
|
||||
device = GPUDevice(
|
||||
backend=GPUBackend.OPENCL,
|
||||
index=len([d for d in self._devices if d.backend == GPUBackend.OPENCL]),
|
||||
name=dev.name.strip(),
|
||||
memory_mb=mem_mb,
|
||||
extra={"platform": plat.name.strip()},
|
||||
)
|
||||
self._devices.append(device)
|
||||
logger.info(f"[GPU] OpenCL device: {device.name} ({mem_mb} MB)")
|
||||
except ImportError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.debug(f"[GPU] OpenCL probe error: {e}")
|
||||
|
||||
# Auto-select best: prefer CUDA > OpenCL > CPU
|
||||
cuda_devices = [d for d in self._devices if d.backend == GPUBackend.CUDA]
|
||||
if cuda_devices:
|
||||
self._active_backend = GPUBackend.CUDA
|
||||
self._active_device = cuda_devices[0]
|
||||
logger.info(f"[GPU] Active backend: CUDA — {self._active_device.name}")
|
||||
else:
|
||||
self._active_backend = GPUBackend.CPU
|
||||
self._active_device = cpu_device
|
||||
logger.info("[GPU] Active backend: CPU (NumPy)")
|
||||
|
||||
@property
|
||||
def gpu_available(self) -> bool:
|
||||
return self._active_backend != GPUBackend.CPU
|
||||
|
||||
def get_array_module(self) -> Any:
|
||||
"""Return cupy (if CUDA active) or numpy."""
|
||||
if self._active_backend == GPUBackend.CUDA and self._cupy is not None:
|
||||
return self._cupy
|
||||
return np
|
||||
|
||||
def to_cpu(self, arr: Any) -> np.ndarray:
|
||||
"""Transfer array to CPU numpy."""
|
||||
if hasattr(arr, 'get'):
|
||||
return arr.get()
|
||||
return np.asarray(arr)
|
||||
|
||||
def get_status(self) -> dict:
|
||||
"""Full status dict for API."""
|
||||
return {
|
||||
"active_backend": self._active_backend.value,
|
||||
"active_device": {
|
||||
"backend": self._active_device.backend.value,
|
||||
"index": self._active_device.index,
|
||||
"name": self._active_device.name,
|
||||
"memory_mb": self._active_device.memory_mb,
|
||||
} if self._active_device else None,
|
||||
"gpu_available": self.gpu_available,
|
||||
"available_devices": [
|
||||
{
|
||||
"backend": d.backend.value,
|
||||
"index": d.index,
|
||||
"name": d.name,
|
||||
"memory_mb": d.memory_mb,
|
||||
}
|
||||
for d in self._devices
|
||||
],
|
||||
}
|
||||
|
||||
def get_devices(self) -> list[dict]:
|
||||
"""Device list for API."""
|
||||
return [
|
||||
{
|
||||
"backend": d.backend.value,
|
||||
"index": d.index,
|
||||
"name": d.name,
|
||||
"memory_mb": d.memory_mb,
|
||||
}
|
||||
for d in self._devices
|
||||
]
|
||||
|
||||
def set_device(self, backend: str, index: int = 0) -> dict:
|
||||
"""Switch active compute device."""
|
||||
target_backend = GPUBackend(backend)
|
||||
candidates = [d for d in self._devices
|
||||
if d.backend == target_backend and d.index == index]
|
||||
if not candidates:
|
||||
raise ValueError(f"No device found: backend={backend}, index={index}")
|
||||
|
||||
self._active_device = candidates[0]
|
||||
self._active_backend = target_backend
|
||||
|
||||
if target_backend == GPUBackend.CUDA and self._cupy is not None:
|
||||
self._cupy.cuda.Device(index).use()
|
||||
|
||||
logger.info(f"[GPU] Switched to: {self._active_device.name} ({target_backend.value})")
|
||||
return {
|
||||
"backend": self._active_backend.value,
|
||||
"device": self._active_device.name,
|
||||
}
|
||||
|
||||
|
||||
# Singleton
|
||||
gpu_manager = GPUManager()
|
||||
@@ -3,7 +3,7 @@ GPU-accelerated computation service using CuPy.
|
||||
Falls back to NumPy when CuPy/CUDA is not available.
|
||||
|
||||
Provides vectorized batch operations for coverage calculation:
|
||||
- Haversine distance (site → all grid points)
|
||||
- Haversine distance (site -> all grid points)
|
||||
- Okumura-Hata path loss (all distances at once)
|
||||
|
||||
Usage:
|
||||
@@ -11,48 +11,29 @@ Usage:
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Dict, Any
|
||||
|
||||
# ── Try CuPy import ──
|
||||
|
||||
GPU_AVAILABLE = False
|
||||
GPU_INFO: Optional[Dict[str, Any]] = None
|
||||
cp = None
|
||||
|
||||
try:
|
||||
import cupy as _cp
|
||||
device_count = _cp.cuda.runtime.getDeviceCount()
|
||||
if device_count > 0:
|
||||
cp = _cp
|
||||
GPU_AVAILABLE = True
|
||||
props = _cp.cuda.runtime.getDeviceProperties(0)
|
||||
GPU_INFO = {
|
||||
"name": props["name"].decode() if isinstance(props["name"], bytes) else str(props["name"]),
|
||||
"memory_mb": props["totalGlobalMem"] // (1024 * 1024),
|
||||
"cuda_version": _cp.cuda.runtime.runtimeGetVersion(),
|
||||
}
|
||||
print(f"[GPU] CUDA available: {GPU_INFO['name']} ({GPU_INFO['memory_mb']} MB)", flush=True)
|
||||
else:
|
||||
print("[GPU] No CUDA devices found", flush=True)
|
||||
except ImportError:
|
||||
print("[GPU] CuPy not installed — using CPU/NumPy", flush=True)
|
||||
print("[GPU] To enable GPU acceleration, install CuPy:", flush=True)
|
||||
print("[GPU] For CUDA 12.x: pip install cupy-cuda12x", flush=True)
|
||||
print("[GPU] For CUDA 11.x: pip install cupy-cuda11x", flush=True)
|
||||
print("[GPU] Check CUDA version: nvidia-smi", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[GPU] CuPy error: {e} — GPU acceleration disabled", flush=True)
|
||||
from app.services.gpu_backend import gpu_manager
|
||||
|
||||
# Backward-compatible exports
|
||||
GPU_AVAILABLE = gpu_manager.gpu_available
|
||||
GPU_INFO: Dict[str, Any] | None = (
|
||||
{
|
||||
"name": gpu_manager._active_device.name,
|
||||
"memory_mb": gpu_manager._active_device.memory_mb,
|
||||
**gpu_manager._active_device.extra,
|
||||
}
|
||||
if gpu_manager.gpu_available and gpu_manager._active_device
|
||||
else None
|
||||
)
|
||||
|
||||
# Array module: cupy on GPU, numpy on CPU
|
||||
xp = cp if GPU_AVAILABLE else np
|
||||
xp = gpu_manager.get_array_module()
|
||||
|
||||
|
||||
def _to_cpu(arr):
|
||||
"""Transfer array to CPU numpy if on GPU."""
|
||||
if GPU_AVAILABLE and hasattr(arr, 'get'):
|
||||
return arr.get()
|
||||
return np.asarray(arr)
|
||||
return gpu_manager.to_cpu(arr)
|
||||
|
||||
|
||||
class GPUService:
|
||||
@@ -60,13 +41,13 @@ class GPUService:
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return GPU_AVAILABLE
|
||||
return gpu_manager.gpu_available
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
"""Return GPU info dict for system endpoint."""
|
||||
if not GPU_AVAILABLE:
|
||||
if not gpu_manager.gpu_available:
|
||||
return {"available": False, "name": None, "memory_mb": None}
|
||||
return {"available": True, **GPU_INFO}
|
||||
return {"available": True, **(GPU_INFO or {})}
|
||||
|
||||
def precompute_distances(
|
||||
self,
|
||||
@@ -79,16 +60,17 @@ class GPUService:
|
||||
|
||||
Returns distances in meters as a CPU numpy array.
|
||||
"""
|
||||
lat1 = xp.radians(xp.asarray(grid_lats, dtype=xp.float64))
|
||||
lon1 = xp.radians(xp.asarray(grid_lons, dtype=xp.float64))
|
||||
lat2 = xp.radians(xp.float64(site_lat))
|
||||
lon2 = xp.radians(xp.float64(site_lon))
|
||||
_xp = gpu_manager.get_array_module()
|
||||
lat1 = _xp.radians(_xp.asarray(grid_lats, dtype=_xp.float64))
|
||||
lon1 = _xp.radians(_xp.asarray(grid_lons, dtype=_xp.float64))
|
||||
lat2 = _xp.radians(_xp.float64(site_lat))
|
||||
lon2 = _xp.radians(_xp.float64(site_lon))
|
||||
|
||||
dlat = lat2 - lat1
|
||||
dlon = lon2 - lon1
|
||||
|
||||
a = xp.sin(dlat / 2) ** 2 + xp.cos(lat1) * xp.cos(lat2) * xp.sin(dlon / 2) ** 2
|
||||
c = 2 * xp.arcsin(xp.sqrt(a))
|
||||
a = _xp.sin(dlat / 2) ** 2 + _xp.cos(lat1) * _xp.cos(lat2) * _xp.sin(dlon / 2) ** 2
|
||||
c = 2 * _xp.arcsin(_xp.sqrt(a))
|
||||
|
||||
distances = 6371000.0 * c
|
||||
return _to_cpu(distances)
|
||||
@@ -108,40 +90,41 @@ class GPUService:
|
||||
|
||||
Returns path loss in dB as a CPU numpy array.
|
||||
"""
|
||||
d_arr = xp.asarray(distances, dtype=xp.float64)
|
||||
d_km = xp.maximum(d_arr / 1000.0, 0.1)
|
||||
_xp = gpu_manager.get_array_module()
|
||||
d_arr = _xp.asarray(distances, dtype=_xp.float64)
|
||||
d_km = _xp.maximum(d_arr / 1000.0, 0.1)
|
||||
|
||||
freq = float(frequency_mhz)
|
||||
h_tx = max(float(tx_height), 1.0)
|
||||
h_rx = max(float(rx_height), 1.0)
|
||||
|
||||
log_f = xp.log10(xp.float64(freq))
|
||||
log_hb = xp.log10(xp.float64(max(h_tx, 1.0)))
|
||||
log_f = _xp.log10(_xp.float64(freq))
|
||||
log_hb = _xp.log10(_xp.float64(max(h_tx, 1.0)))
|
||||
|
||||
if freq > 2000:
|
||||
# Free-Space Path Loss: FSPL = 20*log10(d_km) + 20*log10(f) + 32.45
|
||||
L = 20.0 * xp.log10(d_km) + 20.0 * log_f + 32.45
|
||||
L = 20.0 * _xp.log10(d_km) + 20.0 * log_f + 32.45
|
||||
|
||||
elif freq > 1500:
|
||||
# COST-231 Hata: extends Okumura-Hata to 1500-2000 MHz
|
||||
a_hm = (1.1 * log_f - 0.7) * h_rx - (1.56 * log_f - 0.8)
|
||||
L = (46.3 + 33.9 * log_f - 13.82 * log_hb - a_hm
|
||||
+ (44.9 - 6.55 * log_hb) * xp.log10(d_km))
|
||||
+ (44.9 - 6.55 * log_hb) * _xp.log10(d_km))
|
||||
if environment == "urban":
|
||||
L += 3.0 # Metropolitan center correction
|
||||
|
||||
elif freq >= 150:
|
||||
# Okumura-Hata: 150-1500 MHz
|
||||
if environment == "urban" and freq >= 400:
|
||||
a_hm = 3.2 * (xp.log10(11.75 * h_rx) ** 2) - 4.97
|
||||
a_hm = 3.2 * (_xp.log10(11.75 * h_rx) ** 2) - 4.97
|
||||
else:
|
||||
a_hm = (1.1 * log_f - 0.7) * h_rx - (1.56 * log_f - 0.8)
|
||||
|
||||
L_urban = (69.55 + 26.16 * log_f - 13.82 * log_hb - a_hm
|
||||
+ (44.9 - 6.55 * log_hb) * xp.log10(d_km))
|
||||
+ (44.9 - 6.55 * log_hb) * _xp.log10(d_km))
|
||||
|
||||
if environment == "suburban":
|
||||
L = L_urban - 2 * (xp.log10(freq / 28) ** 2) - 5.4
|
||||
L = L_urban - 2 * (_xp.log10(freq / 28) ** 2) - 5.4
|
||||
elif environment == "rural":
|
||||
L = L_urban - 4.78 * (log_f ** 2) + 18.33 * log_f - 35.94
|
||||
elif environment == "open":
|
||||
@@ -152,7 +135,7 @@ class GPUService:
|
||||
else:
|
||||
# Very low frequency — Longley-Rice simplified (area mode)
|
||||
# Use FSPL as baseline with terrain roughness correction
|
||||
L = 20.0 * xp.log10(d_km) + 20.0 * log_f + 32.45 + 10.0
|
||||
L = 20.0 * _xp.log10(d_km) + 20.0 * log_f + 32.45 + 10.0
|
||||
|
||||
return _to_cpu(L)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user