""" GPU-accelerated computation service using CuPy. Falls back to NumPy when CuPy/CUDA is not available. Provides vectorized batch operations for coverage calculation: - Haversine distance (site -> all grid points) - Okumura-Hata path loss (all distances at once) Usage: from app.services.gpu_service import gpu_service, GPU_AVAILABLE """ import numpy as np from typing import Dict, Any from app.services.gpu_backend import gpu_manager # Backward-compatible exports GPU_AVAILABLE = gpu_manager.gpu_available GPU_INFO: Dict[str, Any] | None = ( { "name": gpu_manager._active_device.name, "memory_mb": gpu_manager._active_device.memory_mb, **gpu_manager._active_device.extra, } if gpu_manager.gpu_available and gpu_manager._active_device else None ) # Array module: cupy on GPU, numpy on CPU xp = gpu_manager.get_array_module() def _to_cpu(arr): """Transfer array to CPU numpy if on GPU.""" return gpu_manager.to_cpu(arr) class GPUService: """GPU-accelerated batch operations for coverage calculation.""" @property def available(self) -> bool: return gpu_manager.gpu_available def get_info(self) -> Dict[str, Any]: """Return GPU info dict for system endpoint.""" if not gpu_manager.gpu_available: return {"available": False, "name": None, "memory_mb": None} return {"available": True, **(GPU_INFO or {})} def precompute_distances( self, grid_lats: np.ndarray, grid_lons: np.ndarray, site_lat: float, site_lon: float, ) -> np.ndarray: """Vectorized haversine distance from site to all grid points. Returns distances in meters as a CPU numpy array. """ _xp = gpu_manager.get_array_module() lat1 = _xp.radians(_xp.asarray(grid_lats, dtype=_xp.float64)) lon1 = _xp.radians(_xp.asarray(grid_lons, dtype=_xp.float64)) lat2 = _xp.radians(_xp.float64(site_lat)) lon2 = _xp.radians(_xp.float64(site_lon)) dlat = lat2 - lat1 dlon = lon2 - lon1 a = _xp.sin(dlat / 2) ** 2 + _xp.cos(lat1) * _xp.cos(lat2) * _xp.sin(dlon / 2) ** 2 c = 2 * _xp.arcsin(_xp.sqrt(a)) distances = 6371000.0 * c return _to_cpu(distances) def precompute_path_loss( self, distances: np.ndarray, frequency_mhz: float, tx_height: float, rx_height: float = 1.5, environment: str = "urban", ) -> np.ndarray: """Vectorized path loss using the appropriate propagation model. Selects model based on frequency (Phase 3.0 model selection), then applies the correct formula in a single vectorized numpy pass. Returns path loss in dB as a CPU numpy array. """ _xp = gpu_manager.get_array_module() d_arr = _xp.asarray(distances, dtype=_xp.float64) d_km = _xp.maximum(d_arr / 1000.0, 0.1) freq = float(frequency_mhz) h_tx = max(float(tx_height), 1.0) h_rx = max(float(rx_height), 1.0) log_f = _xp.log10(_xp.float64(freq)) log_hb = _xp.log10(_xp.float64(max(h_tx, 1.0))) if freq > 2000: # Free-Space Path Loss: FSPL = 20*log10(d_km) + 20*log10(f) + 32.45 L = 20.0 * _xp.log10(d_km) + 20.0 * log_f + 32.45 elif freq > 1500: # COST-231 Hata: extends Okumura-Hata to 1500-2000 MHz a_hm = (1.1 * log_f - 0.7) * h_rx - (1.56 * log_f - 0.8) L = (46.3 + 33.9 * log_f - 13.82 * log_hb - a_hm + (44.9 - 6.55 * log_hb) * _xp.log10(d_km)) if environment == "urban": L += 3.0 # Metropolitan center correction elif freq >= 150: # Okumura-Hata: 150-1500 MHz if environment == "urban" and freq >= 400: a_hm = 3.2 * (_xp.log10(11.75 * h_rx) ** 2) - 4.97 else: a_hm = (1.1 * log_f - 0.7) * h_rx - (1.56 * log_f - 0.8) L_urban = (69.55 + 26.16 * log_f - 13.82 * log_hb - a_hm + (44.9 - 6.55 * log_hb) * _xp.log10(d_km)) if environment == "suburban": L = L_urban - 2 * (_xp.log10(freq / 28) ** 2) - 5.4 elif environment == "rural": L = L_urban - 4.78 * (log_f ** 2) + 18.33 * log_f - 35.94 elif environment == "open": L = L_urban - 4.78 * (log_f ** 2) + 18.33 * log_f - 40.94 else: L = L_urban else: # Very low frequency — Longley-Rice simplified (area mode) # Use FSPL as baseline with terrain roughness correction L = 20.0 * _xp.log10(d_km) + 20.0 * log_f + 32.45 + 10.0 return _to_cpu(L) def batch_terrain_los( self, site_lat: float, site_lon: float, site_height: float, site_elevation: float, grid_lats: np.ndarray, grid_lons: np.ndarray, grid_elevations: np.ndarray, distances: np.ndarray, frequency_mhz: float, terrain_cache: dict, num_samples: int = 30, ) -> tuple[np.ndarray, np.ndarray]: """Batch compute terrain LOS and diffraction loss for all grid points. This is the key GPU optimization — instead of sampling terrain profiles one point at a time, we sample ALL profiles in parallel using vectorized operations. Args: site_lat, site_lon: Site coordinates site_height: Antenna height above ground (meters) site_elevation: Ground elevation at site (meters) grid_lats, grid_lons: All grid point coordinates grid_elevations: Ground elevation at each grid point distances: Pre-computed distances from site to each point (meters) frequency_mhz: Frequency for diffraction calculation terrain_cache: Dict[tile_name -> numpy array] from terrain_service num_samples: Number of samples per terrain profile Returns: (has_los, terrain_loss) - both shape (N,) has_los: boolean array, True if clear line of sight terrain_loss: diffraction loss in dB (0 if has_los) """ _xp = gpu_manager.get_array_module() N = len(grid_lats) if N == 0: return np.array([], dtype=bool), np.array([], dtype=np.float64) # Convert inputs to GPU arrays g_lats = _xp.asarray(grid_lats, dtype=_xp.float64) g_lons = _xp.asarray(grid_lons, dtype=_xp.float64) g_elevs = _xp.asarray(grid_elevations, dtype=_xp.float64) g_dists = _xp.asarray(distances, dtype=_xp.float64) # Heights tx_total = float(site_elevation + site_height) rx_height = 1.5 # Receiver height above ground # Earth curvature constants EARTH_RADIUS = 6371000.0 K_FACTOR = 4.0 / 3.0 effective_radius = K_FACTOR * EARTH_RADIUS # Sample terrain profiles for all points at once # Create sample positions: shape (N, num_samples) t = _xp.linspace(0, 1, num_samples, dtype=_xp.float64) # (S,) t = t.reshape(1, -1) # (1, S) # Interpolate lat/lon for all sample points # sample_lats[i, j] = site_lat + t[j] * (grid_lats[i] - site_lat) dlat = g_lats.reshape(-1, 1) - site_lat # (N, 1) dlon = g_lons.reshape(-1, 1) - site_lon # (N, 1) sample_lats = site_lat + t * dlat # (N, S) sample_lons = site_lon + t * dlon # (N, S) # Sample distances along path: shape (N, S) sample_dists = t * g_dists.reshape(-1, 1) # (N, S) # Get terrain elevations for all samples # This is the tricky part - we need to look up from the tile cache # For GPU efficiency, we'll do this on CPU then transfer sample_lats_cpu = _to_cpu(sample_lats).flatten() sample_lons_cpu = _to_cpu(sample_lons).flatten() # Batch elevation lookup from cache sample_elevs_cpu = self._batch_elevation_lookup( sample_lats_cpu, sample_lons_cpu, terrain_cache ) sample_elevs = _xp.asarray(sample_elevs_cpu, dtype=_xp.float64).reshape(N, num_samples) # Compute LOS line height at each sample point # Linear interpolation from tx to rx rx_total = g_elevs + rx_height # (N,) los_heights = tx_total + t * (rx_total.reshape(-1, 1) - tx_total) # (N, S) # Earth curvature correction at each sample total_dist = g_dists.reshape(-1, 1) # (N, 1) d = sample_dists # (N, S) curvature = (d * (total_dist - d)) / (2 * effective_radius) # (N, S) los_heights_corrected = los_heights - curvature # (N, S) # Clearance at each sample point clearances = los_heights_corrected - sample_elevs # (N, S) # Minimum clearance per profile min_clearances = _xp.min(clearances, axis=1) # (N,) # Has LOS if minimum clearance > 0 has_los = min_clearances > 0 # (N,) # Diffraction loss for points without LOS # Using simplified ITU-R P.526 formula terrain_loss = _xp.zeros(N, dtype=_xp.float64) # Only compute diffraction where blocked blocked_mask = ~has_los blocked_clearances = min_clearances[blocked_mask] if _xp.any(blocked_mask): # v = |clearance| / 10 (simplified Fresnel parameter) v = _xp.abs(blocked_clearances) / 10.0 # Diffraction loss formula from ITU-R P.526 loss = _xp.where( v <= 0, _xp.zeros_like(v), _xp.where( v < 2.4, 6.02 + 9.11 * v + 1.65 * v ** 2, 12.95 + 20 * _xp.log10(v) ) ) # Cap at reasonable max loss = _xp.minimum(loss, 40.0) terrain_loss[blocked_mask] = loss return _to_cpu(has_los).astype(bool), _to_cpu(terrain_loss) def _batch_elevation_lookup( self, lats: np.ndarray, lons: np.ndarray, terrain_cache: dict, ) -> np.ndarray: """Look up elevations from cached terrain tiles. Vectorized implementation: processes per-tile (1-4 tiles) instead of per-point (thousands of points). Inner operations are all NumPy vectorized. Args: lats, lons: Flattened arrays of coordinates terrain_cache: Dict mapping tile_name -> numpy array Returns: elevations: Same shape as input lats """ elevations = np.zeros(len(lats), dtype=np.float64) # Vectorized tile identification lat_ints = np.floor(lats).astype(int) lon_ints = np.floor(lons).astype(int) # Process per tile (usually 1-4 tiles, not per point) unique_tiles = set(zip(lat_ints, lon_ints)) for lat_int, lon_int in unique_tiles: lat_letter = 'N' if lat_int >= 0 else 'S' lon_letter = 'E' if lon_int >= 0 else 'W' tile_name = f"{lat_letter}{abs(lat_int):02d}{lon_letter}{abs(lon_int):03d}" tile = terrain_cache.get(tile_name) if tile is None: continue # Mask for points in this tile mask = (lat_ints == lat_int) & (lon_ints == lon_int) tile_lats = lats[mask] tile_lons = lons[mask] size = tile.shape[0] # Vectorized row/col calculation rows = ((1 - (tile_lats - lat_int)) * (size - 1)).astype(int) cols = ((tile_lons - lon_int) * (size - 1)).astype(int) rows = np.clip(rows, 0, size - 1) cols = np.clip(cols, 0, size - 1) # Vectorized lookup - single operation for ALL points in tile tile_elevs = tile[rows, cols].astype(np.float64) tile_elevs[tile_elevs == -32768] = 0.0 elevations[mask] = tile_elevs return elevations def batch_antenna_pattern( self, site_lat: float, site_lon: float, grid_lats: np.ndarray, grid_lons: np.ndarray, azimuth: float, beamwidth: float, ) -> np.ndarray: """Batch compute antenna pattern loss for all grid points. Returns antenna_loss in dB, shape (N,) """ _xp = gpu_manager.get_array_module() N = len(grid_lats) if N == 0 or azimuth is None or not beamwidth: return np.zeros(N, dtype=np.float64) # Convert to radians lat1 = _xp.radians(_xp.float64(site_lat)) lon1 = _xp.radians(_xp.float64(site_lon)) lat2 = _xp.radians(_xp.asarray(grid_lats, dtype=_xp.float64)) lon2 = _xp.radians(_xp.asarray(grid_lons, dtype=_xp.float64)) # Calculate bearing from site to each point dlon = lon2 - lon1 x = _xp.sin(dlon) * _xp.cos(lat2) y = _xp.cos(lat1) * _xp.sin(lat2) - _xp.sin(lat1) * _xp.cos(lat2) * _xp.cos(dlon) bearings = (_xp.degrees(_xp.arctan2(x, y)) + 360) % 360 # Angle difference from antenna azimuth angle_diff = _xp.abs(bearings - azimuth) angle_diff = _xp.where(angle_diff > 180, 360 - angle_diff, angle_diff) # Antenna pattern loss (simplified sector pattern) half_bw = beamwidth / 2 in_main = angle_diff <= half_bw loss_main = 3 * (angle_diff / half_bw) ** 2 loss_side = 3 + 12 * ((angle_diff - half_bw) / half_bw) ** 2 loss_side = _xp.minimum(loss_side, 25.0) antenna_loss = _xp.where(in_main, loss_main, loss_side) return _to_cpu(antenna_loss) def batch_final_rsrp( self, tx_power: float, tx_gain: float, path_loss: np.ndarray, terrain_loss: np.ndarray, antenna_loss: np.ndarray, building_loss: np.ndarray, vegetation_loss: np.ndarray, rain_loss: np.ndarray, indoor_loss: np.ndarray, atmospheric_loss: np.ndarray, reflection_gain: np.ndarray, fading_margin: float = 0.0, ) -> np.ndarray: """Vectorized final RSRP calculation. RSRP = tx_power + tx_gain - path_loss - terrain_loss - antenna_loss - building_loss - vegetation_loss - rain_loss - indoor_loss - atmospheric_loss + reflection_gain - fading_margin Returns RSRP in dBm, shape (N,) """ _xp = gpu_manager.get_array_module() rsrp = ( float(tx_power) + float(tx_gain) - _xp.asarray(path_loss, dtype=_xp.float64) - _xp.asarray(terrain_loss, dtype=_xp.float64) - _xp.asarray(antenna_loss, dtype=_xp.float64) - _xp.asarray(building_loss, dtype=_xp.float64) - _xp.asarray(vegetation_loss, dtype=_xp.float64) - _xp.asarray(rain_loss, dtype=_xp.float64) - _xp.asarray(indoor_loss, dtype=_xp.float64) - _xp.asarray(atmospheric_loss, dtype=_xp.float64) + _xp.asarray(reflection_gain, dtype=_xp.float64) - float(fading_margin) ) return _to_cpu(rsrp) # Singleton gpu_service = GPUService()