From 7f0b4d226951b60ae292ec211f9c7fcb65ae8532 Mon Sep 17 00:00:00 2001 From: mytec Date: Mon, 2 Feb 2026 13:48:30 +0200 Subject: [PATCH] @mytec: before 3.3.0 refactor2 --- RFCP-Iteration-3.3.0-Architecture-Refactor.md | 723 ++++++++++++++++++ ...b3-461d-9341-81ce0ff4260d_text_markdown.md | 191 +++++ 2 files changed, 914 insertions(+) create mode 100644 RFCP-Iteration-3.3.0-Architecture-Refactor.md create mode 100644 compass_artifact_wf-6a6a41ed-a3b3-461d-9341-81ce0ff4260d_text_markdown.md diff --git a/RFCP-Iteration-3.3.0-Architecture-Refactor.md b/RFCP-Iteration-3.3.0-Architecture-Refactor.md new file mode 100644 index 0000000..a798dfd --- /dev/null +++ b/RFCP-Iteration-3.3.0-Architecture-Refactor.md @@ -0,0 +1,723 @@ +# RFCP - Iteration 3.3.0: Performance Architecture Refactor + +## Overview + +Major refactoring based on research into professional RF tools (Signal-Server, SPLAT!, CloudRF SLEIPNIR, Sionna RT). + +**Root cause identified:** Pickle serialization overhead dominates computation time. +- DP_TIMING shows: 0.6-0.9ms per point (actual calculation) +- Real throughput: 258ms per point +- **99% of time is IPC overhead, not calculation!** + +**Target:** Reduce 5km Detailed from timeout (300s) to <30s + +--- + +## Part 1: Eliminate Pickle Overhead (CRITICAL) + +### 1.1 Shared Memory for Buildings + +Currently terrain is in shared memory, but **15,000 buildings are pickled for every chunk**. + +**File:** `backend/app/services/parallel_coverage_service.py` + +```python +from multiprocessing import shared_memory +import numpy as np + +def buildings_to_shared_memory(buildings: list) -> tuple: + """ + Convert buildings to numpy arrays and store in shared memory. + + Returns: (shm_name, shape, dtype) for reconstruction in workers + """ + # Extract building data into numpy arrays + # For each building we need: lat, lon, height, num_vertices, vertices_flat + + # Simplified: store as structured array + building_data = [] + all_vertices = [] + vertex_offsets = [0] + + for b in buildings: + coords = extract_coords(b) + height = b.get('properties', {}).get('height', 10.0) + + building_data.append({ + 'lat': np.mean([c[1] for c in coords]), + 'lon': np.mean([c[0] for c in coords]), + 'height': height, + 'vertex_start': len(all_vertices), + 'vertex_count': len(coords) + }) + all_vertices.extend(coords) + vertex_offsets.append(len(all_vertices)) + + # Create numpy arrays + buildings_arr = np.array([ + (b['lat'], b['lon'], b['height'], b['vertex_start'], b['vertex_count']) + for b in building_data + ], dtype=[ + ('lat', 'f8'), ('lon', 'f8'), ('height', 'f4'), + ('vertex_start', 'i4'), ('vertex_count', 'i2') + ]) + + vertices_arr = np.array(all_vertices, dtype=[('lon', 'f8'), ('lat', 'f8')]) + + # Store in shared memory + shm_buildings = shared_memory.SharedMemory( + create=True, + size=buildings_arr.nbytes, + name=f"rfcp_buildings_{os.getpid()}" + ) + shm_vertices = shared_memory.SharedMemory( + create=True, + size=vertices_arr.nbytes, + name=f"rfcp_vertices_{os.getpid()}" + ) + + # Copy data + np.ndarray(buildings_arr.shape, dtype=buildings_arr.dtype, + buffer=shm_buildings.buf)[:] = buildings_arr + np.ndarray(vertices_arr.shape, dtype=vertices_arr.dtype, + buffer=shm_vertices.buf)[:] = vertices_arr + + return { + 'buildings': (shm_buildings.name, buildings_arr.shape, buildings_arr.dtype), + 'vertices': (shm_vertices.name, vertices_arr.shape, vertices_arr.dtype) + } + + +def buildings_from_shared_memory(shm_info: dict) -> tuple: + """Reconstruct buildings arrays from shared memory in worker.""" + shm_b = shared_memory.SharedMemory(name=shm_info['buildings'][0]) + shm_v = shared_memory.SharedMemory(name=shm_info['vertices'][0]) + + buildings = np.ndarray( + shm_info['buildings'][1], + dtype=shm_info['buildings'][2], + buffer=shm_b.buf + ) + vertices = np.ndarray( + shm_info['vertices'][1], + dtype=shm_info['vertices'][2], + buffer=shm_v.buf + ) + + return buildings, vertices, shm_b, shm_v +``` + +### 1.2 Increase Batch Size + +**Current:** 7 chunks of ~144 points = high IPC overhead per point +**Target:** 2-3 chunks of ~300-400 points = amortize IPC cost + +```python +# In parallel_coverage_service.py +def calculate_optimal_chunk_size(total_points: int, num_workers: int) -> int: + """ + Calculate chunk size to minimize IPC overhead. + + Rule: computation_time should be 10-100x serialization_time + For RF calculations: ~1ms compute, ~50ms serialize + So batch at least 500 points to make compute dominate. + """ + min_chunk = 300 # Minimum to amortize IPC + max_chunk = 1000 # Maximum for memory + + ideal_chunks = max(2, num_workers) # At least 2 chunks per worker + ideal_size = total_points // ideal_chunks + + return max(min_chunk, min(max_chunk, ideal_size)) +``` + +### 1.3 Pre-build Spatial Index Once + +Currently spatial index may be rebuilt per-chunk. Build once and share reference. + +```python +class SharedSpatialIndex: + """Spatial index that can be shared across processes via shared memory.""" + + def __init__(self, buildings_shm_info: dict): + self.buildings, self.vertices, _, _ = buildings_from_shared_memory(buildings_shm_info) + self._build_grid() + + def _build_grid(self): + """Build simple grid-based spatial index.""" + # Grid cells of ~100m + self.cell_size = 0.001 # ~111m in degrees + self.grid = defaultdict(list) + + for i, b in enumerate(self.buildings): + cell_x = int(b['lon'] / self.cell_size) + cell_y = int(b['lat'] / self.cell_size) + self.grid[(cell_x, cell_y)].append(i) + + def query_radius(self, lat: float, lon: float, radius_m: float) -> list: + """Get building indices within radius.""" + radius_deg = radius_m / 111000 + cells_to_check = int(radius_deg / self.cell_size) + 1 + + center_x = int(lon / self.cell_size) + center_y = int(lat / self.cell_size) + + result = [] + for dx in range(-cells_to_check, cells_to_check + 1): + for dy in range(-cells_to_check, cells_to_check + 1): + result.extend(self.grid.get((center_x + dx, center_y + dy), [])) + + return result +``` + +--- + +## Part 2: Radial Calculation Pattern (Signal-Server style) + +Instead of grid, calculate along radial spokes for faster coverage estimation. + +### 2.1 Radial Engine + +**File:** `backend/app/services/radial_coverage_service.py` (NEW) + +```python +""" +Radial coverage calculation engine inspired by Signal-Server/SPLAT! + +Instead of calculating every grid point independently: +1. Cast rays from TX in all directions (0-360°) +2. Sample terrain along each ray (profile) +3. Apply propagation model to profile +4. Interpolate between rays for final grid + +This is 10-50x faster because: +- Terrain profiles are linear (cache-friendly) +- No building geometry per-point (use clutter model) +- Embarrassingly parallel by azimuth +""" + +import numpy as np +from concurrent.futures import ThreadPoolExecutor +import math + +class RadialCoverageEngine: + def __init__(self, terrain_service, propagation_model): + self.terrain = terrain_service + self.model = propagation_model + + def calculate_coverage( + self, + tx_lat: float, tx_lon: float, tx_height: float, + radius_m: float, + frequency_mhz: float, + tx_power_dbm: float, + num_radials: int = 360, # 1° resolution + samples_per_radial: int = 100, + num_threads: int = 8 + ) -> dict: + """ + Calculate coverage using radial ray-casting. + + Returns dict with 'radials' (raw data) and 'grid' (interpolated). + """ + # Pre-load terrain tiles + self._preload_terrain(tx_lat, tx_lon, radius_m) + + # Calculate radials in parallel (by azimuth sectors) + sector_size = num_radials // num_threads + + with ThreadPoolExecutor(max_workers=num_threads) as executor: + futures = [] + for i in range(num_threads): + start_az = i * sector_size + end_az = (i + 1) * sector_size if i < num_threads - 1 else num_radials + + futures.append(executor.submit( + self._calculate_sector, + tx_lat, tx_lon, tx_height, + radius_m, frequency_mhz, tx_power_dbm, + start_az, end_az, samples_per_radial + )) + + # Collect results + all_radials = [] + for f in futures: + all_radials.extend(f.result()) + + return { + 'radials': all_radials, + 'center': (tx_lat, tx_lon), + 'radius': radius_m, + 'num_radials': num_radials + } + + def _calculate_sector( + self, + tx_lat, tx_lon, tx_height, + radius_m, frequency_mhz, tx_power_dbm, + start_az, end_az, samples_per_radial + ) -> list: + """Calculate radials for one azimuth sector.""" + results = [] + + for az in range(start_az, end_az): + radial = self._calculate_radial( + tx_lat, tx_lon, tx_height, + radius_m, frequency_mhz, tx_power_dbm, + az, samples_per_radial + ) + results.append(radial) + + return results + + def _calculate_radial( + self, + tx_lat, tx_lon, tx_height, + radius_m, frequency_mhz, tx_power_dbm, + azimuth_deg, num_samples + ) -> dict: + """ + Calculate signal strength along one radial. + + Uses terrain profile + Longley-Rice style calculation. + """ + az_rad = math.radians(azimuth_deg) + cos_lat = math.cos(math.radians(tx_lat)) + + # Sample points along radial + distances = np.linspace(100, radius_m, num_samples) + + # Calculate lat/lon for each sample + lat_offsets = (distances / 111000) * math.cos(az_rad) + lon_offsets = (distances / (111000 * cos_lat)) * math.sin(az_rad) + + lats = tx_lat + lat_offsets + lons = tx_lon + lon_offsets + + # Get terrain profile + elevations = np.array([ + self.terrain.get_elevation_sync(lat, lon) + for lat, lon in zip(lats, lons) + ]) + + tx_elevation = self.terrain.get_elevation_sync(tx_lat, tx_lon) + + # Calculate path loss for each point + rsrp_values = [] + los_flags = [] + + for i, (dist, elev) in enumerate(zip(distances, elevations)): + # Simple LOS check using terrain profile up to this point + profile = elevations[:i+1] + has_los = self._check_los_profile( + tx_elevation + tx_height, + elev + 1.5, # RX height + profile, + distances[:i+1] + ) + + # Path loss (using configured model) + path_loss = self.model.calculate_path_loss( + frequency_mhz, dist, tx_height, 1.5, + has_los=has_los + ) + + # Add diffraction loss if NLOS + if not has_los: + diff_loss = self._calculate_diffraction_loss( + tx_elevation + tx_height, + elev + 1.5, + profile, + distances[:i+1], + frequency_mhz + ) + path_loss += diff_loss + + rsrp = tx_power_dbm - path_loss + rsrp_values.append(rsrp) + los_flags.append(has_los) + + return { + 'azimuth': azimuth_deg, + 'distances': distances.tolist(), + 'lats': lats.tolist(), + 'lons': lons.tolist(), + 'rsrp': rsrp_values, + 'has_los': los_flags + } + + def _check_los_profile(self, tx_h, rx_h, profile, distances) -> bool: + """Check LOS using terrain profile (Fresnel zone clearance).""" + if len(profile) < 2: + return True + + total_dist = distances[-1] + + # Line from TX to RX + for i in range(1, len(profile) - 1): + d = distances[i] + # Expected height on LOS line + expected_h = tx_h + (rx_h - tx_h) * (d / total_dist) + # Actual terrain height + actual_h = profile[i] + + if actual_h > expected_h - 0.6: # Small clearance margin + return False + + return True + + def _calculate_diffraction_loss(self, tx_h, rx_h, profile, distances, freq_mhz) -> float: + """Calculate diffraction loss using Deygout method.""" + # Find main obstacle + max_v = -999 + max_idx = -1 + total_dist = distances[-1] + wavelength = 300 / freq_mhz # meters + + for i in range(1, len(profile) - 1): + d1 = distances[i] + d2 = total_dist - d1 + + # Height of LOS line at this point + los_h = tx_h + (rx_h - tx_h) * (d1 / total_dist) + + # Obstacle height above LOS + h = profile[i] - los_h + + if h > 0: + # Fresnel parameter + v = h * math.sqrt(2 * (d1 + d2) / (wavelength * d1 * d2)) + if v > max_v: + max_v = v + max_idx = i + + if max_v < -0.78: + return 0.0 + + # Knife-edge diffraction loss (ITU-R P.526) + if max_v < 0: + loss = 6.02 + 9.11 * max_v - 1.27 * max_v * max_v + elif max_v < 2.4: + loss = 6.02 + 9.11 * max_v + 1.65 * max_v * max_v + else: + loss = 12.953 + 20 * math.log10(max_v) + + return max(0, loss) +``` + +--- + +## Part 3: Propagation Model Updates + +### 3.1 Add Longley-Rice ITM Support + +**File:** `backend/app/services/propagation_models/itm_model.py` (NEW) + +```python +""" +Longley-Rice Irregular Terrain Model (ITM) + +Best for: VHF/UHF terrain-based propagation (20 MHz - 20 GHz) +Based on: itmlogic Python package + +Key parameters: +- Earth dielectric constant (eps): 4-81 (15 typical for ground) +- Ground conductivity (sgm): 0.001-5.0 S/m +- Atmospheric refractivity (ens): 250-400 N-units (301 typical) +- Climate: 1=Equatorial, 2=Continental Subtropical, etc. +""" + +try: + from itmlogic import itmlogic_p2p + HAS_ITMLOGIC = True +except ImportError: + HAS_ITMLOGIC = False + +from .base_model import BasePropagationModel, PropagationInput, PropagationResult + +class LongleyRiceModel(BasePropagationModel): + """Longley-Rice ITM propagation model.""" + + name = "Longley-Rice-ITM" + frequency_range = (20, 20000) # MHz + distance_range = (1000, 2000000) # meters + + # Default ITM parameters + DEFAULT_PARAMS = { + 'eps': 15.0, # Earth dielectric constant + 'sgm': 0.005, # Ground conductivity (S/m) + 'ens': 301.0, # Atmospheric refractivity (N-units) + 'pol': 0, # Polarization: 0=horizontal, 1=vertical + 'mdvar': 12, # Mode of variability + 'klim': 5, # Climate: 5=Continental Temperate + } + + # Ground parameters by type + GROUND_PARAMS = { + 'average': {'eps': 15.0, 'sgm': 0.005}, + 'poor': {'eps': 4.0, 'sgm': 0.001}, + 'good': {'eps': 25.0, 'sgm': 0.020}, + 'fresh_water': {'eps': 81.0, 'sgm': 0.010}, + 'sea_water': {'eps': 81.0, 'sgm': 5.0}, + 'forest': {'eps': 12.0, 'sgm': 0.003}, + } + + def __init__(self, ground_type: str = 'average', climate: int = 5): + if not HAS_ITMLOGIC: + raise ImportError("itmlogic package required: pip install itmlogic") + + self.params = self.DEFAULT_PARAMS.copy() + if ground_type in self.GROUND_PARAMS: + self.params.update(self.GROUND_PARAMS[ground_type]) + self.params['klim'] = climate + + def calculate(self, input: PropagationInput) -> PropagationResult: + """Calculate path loss using ITM point-to-point mode.""" + + # ITM requires terrain profile + if not hasattr(input, 'terrain_profile') or input.terrain_profile is None: + # Fallback to free-space if no terrain + return self._free_space_fallback(input) + + result = itmlogic_p2p( + input.terrain_profile, # Elevation samples + input.frequency_mhz, + input.tx_height_m, + input.rx_height_m, + self.params['eps'], + self.params['sgm'], + self.params['ens'], + self.params['pol'], + self.params['mdvar'], + self.params['klim'] + ) + + return PropagationResult( + path_loss_db=result['dbloss'], + model_name=self.name, + details={ + 'mode': result.get('propmode', 'unknown'), + 'variability': result.get('var', 0), + } + ) + + def _free_space_fallback(self, input: PropagationInput) -> PropagationResult: + """Free-space path loss when no terrain available.""" + fspl = 20 * np.log10(input.distance_m) + 20 * np.log10(input.frequency_mhz) - 27.55 + return PropagationResult( + path_loss_db=fspl, + model_name=f"{self.name} (FSPL fallback)", + details={'mode': 'free_space'} + ) +``` + +### 3.2 Add VHF/UHF Model Selection + +**File:** `backend/app/services/propagation_models/model_selector.py` + +```python +""" +Automatic propagation model selection based on frequency and environment. +""" + +def select_model_for_frequency( + frequency_mhz: float, + environment: str = 'urban', + has_terrain: bool = True +) -> BasePropagationModel: + """ + Select appropriate propagation model. + + Frequency bands: + - VHF: 30-300 MHz (tactical radios, FM broadcast) + - UHF: 300-3000 MHz (tactical radios, TV, early cellular) + - Cellular: 700-2600 MHz (LTE bands) + - mmWave: 24-100 GHz (5G) + + Decision tree: + 1. VHF/UHF with terrain → Longley-Rice ITM + 2. Urban cellular → COST-231 Hata + 3. Suburban/rural cellular → Okumura-Hata + 4. mmWave → 3GPP 38.901 + """ + + # VHF (30-300 MHz) + if 30 <= frequency_mhz <= 300: + if has_terrain: + return LongleyRiceModel(ground_type='average', climate=5) + else: + return FreeSpaceModel() # Fallback + + # UHF (300-1000 MHz) + elif 300 < frequency_mhz <= 1000: + if has_terrain: + return LongleyRiceModel(ground_type='average', climate=5) + else: + return OkumuraHataModel(environment=environment) + + # Cellular (1000-2600 MHz) + elif 1000 < frequency_mhz <= 2600: + if environment == 'urban': + return Cost231HataModel() + else: + return OkumuraHataModel(environment=environment) + + # Higher frequencies + else: + return FreeSpaceModel() # Or implement 3GPP 38.901 + + +# Frequency band constants for UI +FREQUENCY_BANDS = { + 'VHF_LOW': (30, 88, "VHF Low (30-88 MHz) - Military/Public Safety"), + 'VHF_HIGH': (136, 174, "VHF High (136-174 MHz) - Marine/Aviation"), + 'UHF_LOW': (400, 512, "UHF (400-512 MHz) - Public Safety/Tactical"), + 'UHF_TV': (470, 862, "UHF TV (470-862 MHz)"), + 'LTE_700': (700, 800, "LTE Band 28/20 (700-800 MHz)"), + 'LTE_900': (880, 960, "LTE Band 8 (900 MHz)"), + 'LTE_1800': (1710, 1880, "LTE Band 3 (1800 MHz)"), + 'LTE_2100': (1920, 2170, "LTE Band 1 (2100 MHz)"), + 'LTE_2600': (2500, 2690, "LTE Band 7 (2600 MHz)"), +} +``` + +--- + +## Part 4: Progress Bar Fix (WebSocket) + +### 4.1 Proper Progress Streaming + +The 5% bug persists because WebSocket messages aren't reaching frontend. + +**Debug approach:** + +```python +# In coverage calculation, add explicit progress logging +async def calculate_with_progress(self, ...): + total_points = len(points) + + for i, chunk_result in enumerate(chunk_results): + progress = int((i + 1) / total_chunks * 100) + + # Log to console AND send via WebSocket + logger.info(f"[PROGRESS] {progress}% - chunk {i+1}/{total_chunks}") + + if progress_callback: + await progress_callback(progress, f"Calculating... {i+1}/{total_chunks}") + await asyncio.sleep(0) # Yield to event loop +``` + +**Frontend fix - check WebSocket subscription:** + +```typescript +// In App.tsx or CoverageStore +useEffect(() => { + const ws = new WebSocket('ws://localhost:8888/ws/coverage'); + + ws.onmessage = (event) => { + const data = JSON.parse(event.data); + console.log('[WS] Received:', data); // DEBUG + + if (data.type === 'progress') { + setProgress(data.progress); + setProgressStatus(data.status); + } + }; + + ws.onerror = (e) => console.error('[WS] Error:', e); + ws.onclose = () => console.log('[WS] Closed'); + + return () => ws.close(); +}, []); +``` + +--- + +## Part 5: Testing & Validation + +### 5.1 Performance Benchmarks + +After refactoring, expected performance: + +| Scenario | Before | After | Speedup | +|----------|--------|-------|---------| +| 5km Standard | 5s | 3s | 1.7x | +| 5km Detailed | timeout | 25s | 12x | +| 10km Standard | 30s | 10s | 3x | +| 10km Detailed | timeout | 60s | 5x | + +### 5.2 Test Commands + +```powershell +# Quick test +cd D:\root\rfcp\installer +.\test-detailed-quick.bat + +# Check for [PROGRESS] logs in output +# Check for [DP_TIMING] logs + +# Verify shared memory cleanup +# Check Task Manager for memory after calculation +``` + +--- + +## Implementation Order + +1. **Shared Memory for Buildings** (biggest impact) - Part 1.1 +2. **Increase Batch Size** - Part 1.2 +3. **Progress Bar Debug** - Part 4 +4. **Radial Engine** (optional, for preview mode) - Part 2 +5. **Longley-Rice ITM** (for VHF/UHF) - Part 3 + +--- + +## Dependencies to Add + +``` +# requirements.txt additions +itmlogic>=0.1.0 # Longley-Rice ITM implementation +``` + +--- + +## Commit Message + +``` +feat: Iteration 3.3.0 - Performance Architecture Refactor + +Performance: +- Add shared memory for buildings (eliminate pickle overhead) +- Increase batch size to 300-500 points (amortize IPC) +- Add radial coverage engine (Signal-Server style) + +Propagation Models: +- Add Longley-Rice ITM for VHF/UHF (20 MHz - 20 GHz) +- Add automatic model selection by frequency +- Add frequency band constants for UI + +Bug Fixes: +- Debug and fix WebSocket progress (5% stuck bug) + +Expected: 5km Detailed from timeout → ~25s (12x speedup) +``` + +--- + +## Notes for Claude Code + +This is a significant refactoring. Approach step by step: + +1. First implement shared memory for buildings +2. Test that alone - should see major speedup +3. Then increase batch size +4. Test again +5. Then tackle progress bar +6. Radial engine and ITM can be separate iterations if needed + +The key insight: **99% of time is IPC overhead, not calculation**. +Fixing pickle serialization is the #1 priority. + +--- + +*"Fast per-point means nothing if IPC eats your lunch"* 🍽️ diff --git a/compass_artifact_wf-6a6a41ed-a3b3-461d-9341-81ce0ff4260d_text_markdown.md b/compass_artifact_wf-6a6a41ed-a3b3-461d-9341-81ce0ff4260d_text_markdown.md new file mode 100644 index 0000000..65d9ac3 --- /dev/null +++ b/compass_artifact_wf-6a6a41ed-a3b3-461d-9341-81ce0ff4260d_text_markdown.md @@ -0,0 +1,191 @@ +# RF Coverage Planning Software: Performance Optimization and Propagation Models + +**The performance gap between fast per-point calculations (~1ms) and slow overall throughput (~258ms/point) is caused by pickle serialization overhead in Python multiprocessing**, which dominates actual compute time when processing small batches. The solution involves batching 1000+ points per IPC round-trip, using shared memory for terrain data, and leveraging GPU acceleration for workloads exceeding 10,000 points—achieving 10-50x speedups. Modern RF coverage tools like Signal-Server, SPLAT!, and Sionna RT demonstrate that combining radial segment parallelization, multi-resolution terrain tiling, and appropriate propagation model selection (Longley-Rice ITM for terrain-based VHF/UHF, COST-231 Hata for cellular) enables efficient large-area calculations while maintaining accuracy within 6-10 dB standard deviation. + +--- + +## The multiprocessing bottleneck: why per-point speed deceives + +The dramatic discrepancy between fast individual point calculations and slow aggregate throughput stems from a classic Python multiprocessing anti-pattern where **inter-process communication overhead dominates computation time**. When each worker processes a single point or small batch, the system spends more time serializing and deserializing data than performing actual RF calculations. + +Python's multiprocessing uses pickle for IPC by default, requiring objects to be serialized twice per task (sending to worker and returning results). For RF calculations involving terrain data, DEM arrays, and GIS features, this serialization cost becomes catastrophic. Research shows that pickling a **40 MB dictionary four times per task can cause a 600% slowdown**. The situation worsens because spawning a subprocess takes approximately 50ms (50,000µs) compared to ~100µs for a thread—making process pool initialization per-request extremely expensive. + +The solution architecture requires three fundamental changes. First, batch operations must amortize serialization costs by processing **1,000-10,000 points per IPC round-trip** rather than individual points. Second, shared memory (`multiprocessing.shared_memory` or `numpy.memmap`) should hold terrain data to eliminate pickle overhead entirely. Third, process pools must be pre-initialized at application startup rather than per-request: + +```python +# Anti-pattern: Single-point processing (slow) +with Pool() as pool: + results = pool.map(calculate_point, points) # Each point pickled separately + +# Optimal pattern: Batch processing with shared memory +from multiprocessing import shared_memory +shm = shared_memory.SharedMemory(create=True, size=terrain_data.nbytes) +chunk_size = 1000 # Process 1000 points per IPC round-trip +batches = [points[i:i+chunk_size] for i in range(0, len(points), chunk_size)] +``` + +The target metric is ensuring computation time exceeds serialization time by **10-100x**. For a 1ms per-point calculation, this means batching at least 100-1000 points to make serialization overhead negligible. + +--- + +## Open-source RF tools reveal proven optimization architectures + +**Signal-Server**, the C++14 multi-threaded engine that powered CloudRF from 2012-2016, demonstrates the foundational architecture for RF coverage calculations. Its primary improvement over the original SPLAT! was multi-threading through radial segment parallelization—splitting the circular coverage area so multiple threads process different azimuth ranges simultaneously. The implementation uses POSIX threads with configurable segment counts (must be even and greater than 4), processing up to 32 terrain tiles simultaneously with support for gzip/bzip2 compressed tiles for faster I/O. + +Signal-Server supports 12 propagation models through a simple command-line parameter: ITM (Longley-Rice), line-of-sight, Hata, ECC33, SUI, COST-Hata, free-space, ITWOM, Ericsson, Plane Earth, Egli, and Soil models. The terrain tiling system uses SDF format converted from SRTM HGT files, supporting resolutions of 300/600/1200/3600 pixels per tile with automatic multi-tile loading based on calculation bounds. + +**SPLAT!** (Signal Propagation, Loss, And Terrain), the foundational tool started in 1997, uses a radial ray-casting algorithm that projects rays from the transmitter in all azimuths (0-360°), samples terrain elevation along each path, and applies Longley-Rice ITM calculations to the terrain profile. Its Longley-Rice integration handles three prediction ranges (line-of-sight, diffraction, scatter) with terrain irregularity parameter Δh(d) computed from terrain samples. Key parameters include earth dielectric constant (5-80), ground conductivity (0.001-5.0 S/m), atmospheric refractivity (250-400 N-units), and climate zone selection. + +**Sionna RT by NVIDIA** represents the state-of-the-art in GPU-accelerated RF simulation, using differentiable ray tracing built on TensorFlow, Mitsuba 3, and Dr.Jit. Its key innovation enables gradient computation through channel impulse responses with respect to material properties, antenna patterns, and transmitter/receiver positions—making it suitable for ML-integrated optimization. The path solver supports both Shooting and Bouncing Rays (SBR) and the Image Method, handling direct LOS paths, reflections, diffractions, and scattering patterns. Memory efficiency improvements in version 1.0 support scenes with 3D building models from OpenStreetMap, while configurable path loss thresholds and angular separation control enable scalable computation. + +**CloudRF's SLEIPNIR engine** (replacing Signal-Server in 2019) achieves up to **10x faster** performance through multi-resolution modeling that seamlessly merges different resolution data sources, dual CPU/GPU engines (**78% speedup** with GPU for clutter calculations), and 1m LiDAR resolution support with global 10m land cover integration. + +--- + +## VHF and UHF propagation models differ fundamentally from cellular bands + +The **Longley-Rice Irregular Terrain Model (ITM)** serves as the most comprehensive model for terrain-based VHF/UHF propagation, predicting median attenuation over irregular terrain for frequencies from 20 MHz to 20 GHz across distances of 1-2000 km. The model handles five propagation mechanisms: free-space loss, terrain diffraction (multiple knife-edge), ground reflection, atmospheric refraction (4/3 Earth radius approximation), and tropospheric scatter beyond the horizon. Statistical variables include time, location, and situation variability ranging from 0.01 to 0.99, with typical accuracy of ±6-10 dB standard deviation for point-to-point mode. + +Critical ITM parameters require careful selection based on environment: + +| Ground Type | Permittivity | Conductivity (S/m) | +|------------|--------------|-------------------| +| Average Ground | 15 | 0.005 | +| Poor Ground | 4 | 0.001 | +| Good Ground | 25 | 0.020 | +| Fresh Water | 81 | 0.010 | +| Sea Water | 81 | 5.0 | + +**ITU-R P.1546** provides empirical field-strength curves for 30 MHz to 4 GHz based on extensive Northern Hemisphere measurements, covering distances of 1-1000 km with time percentages of 1%, 10%, and 50%. The model uses reference frequencies of 100, 600, and 2000 MHz with interpolation for other frequencies, applying corrections for terrain clearance angle, receiving antenna height, clutter losses, and mixed land/sea paths. + +For UHF and cellular bands, the **Okumura-Hata model** (150-1500 MHz, 1-20 km distance) and its **COST-231 extension** (1500-2000 MHz) provide rapid empirical calculations with 6-8 dB standard deviation in urban environments. The urban path loss formula is: + +``` +L_urban = 69.55 + 26.16*log10(f) - 13.82*log10(h_b) - a(h_m) + + (44.9 - 6.55*log10(h_b))*log10(d) +``` + +Where `a(h_m)` is the mobile antenna correction factor varying by city size and frequency. Suburban and rural corrections reduce urban loss by 2*(log10(f/28))² + 5.4 dB and 4.78*(log10(f))² - 18.33*log10(f) + 40.94 dB respectively. + +The key propagation differences across frequency bands are dramatic: **VHF wavelengths (1-10m) enable strong diffraction around obstacles but poor building penetration**, while **UHF (0.1-1m wavelength) provides better building penetration but weaker terrain following**. Cellular frequencies (1800+ MHz) have the highest free-space loss baseline, weakest diffraction, and moderate building penetration. Vegetation penetration follows the opposite pattern—VHF penetrates foliage better than higher frequencies where specific attenuation increases significantly. + +--- + +## Terrain diffraction models handle mountainous areas differently + +The **single knife-edge diffraction model** (ITU-R P.526) calculates the Fresnel parameter v and corresponding loss: + +```python +v = h * sqrt(2 * (d1 + d2) / (wavelength * d1 * d2)) +# For v > -0.78: +if v < 0: loss = 6.02 + 9.11*v - 1.27*v² +elif v < 2.4: loss = 6.02 + 9.11*v + 1.65*v² +else: loss = 12.953 + 20*log10(v) +``` + +For multiple obstacles, the **Deygout method** finds the main obstacle (highest Fresnel parameter v between transmitter and receiver), calculates its diffraction loss, then recursively finds secondary obstacles on each side. It provides better accuracy for **widely spaced obstacles** (2-4 ridges) but tends to overestimate for closely spaced obstacles. The **Epstein-Peterson method** calculates diffraction loss sequentially from transmitter to receiver, providing better accuracy for **closely spaced obstacles** but underestimating for widely separated ones. + +The **Bullington equivalent single edge** method replaces all obstacles with one equivalent knife edge, providing the simplest and fastest calculation but often underestimating loss (too optimistic)—useful only for initial estimates. Professional tools like CloudRF implement **Delta-Bullington** as the default for its balance of accuracy and speed, with configurable options including Huygens (basic), sequential multi-obstacle, and Deygout 94 with combining factor. + +--- + +## GPU acceleration delivers 10-50x speedups for appropriate workloads + +The RF calculations benefiting most from GPU acceleration are embarrassingly parallel operations: **ray tracing** (10-100x+ speedup with NVIDIA OptiX), **FFT operations** (cuFFT highly optimized), **viewshed/LOS calculations** (CloudRF reports **50x faster** than CPU), and **batch path loss calculations** for many points. Matrix operations in propagation models benefit from cuBLAS, while terrain correlation matrices and large array operations see significant acceleration. + +**CuPy** provides a drop-in NumPy replacement for NVIDIA GPUs with 10-100x speedups for large arrays (>100,000 elements): + +```python +import cupy as cp +terrain_gpu = cp.asarray(terrain_data) +distances = cp.sqrt(cp.sum((points_gpu - tx_position)**2, axis=1)) +path_loss = 20 * cp.log10(distances) + 20 * cp.log10(frequency_mhz) - 27.55 +results = path_loss.get() # Transfer back to CPU +``` + +**Numba CUDA** enables writing custom GPU kernels in Python for complex propagation models requiring control flow: + +```python +from numba import cuda +import math + +@cuda.jit +def free_space_path_loss_kernel(distances, frequency, output): + idx = cuda.grid(1) + if idx < distances.shape[0]: + output[idx] = 20 * math.log10(distances[idx]) + 20 * math.log10(frequency) - 27.55 +``` + +Minimum problem sizes for GPU benefit are: **10,000+ elements** for array operations, **1,024+ points** for FFT, **512x512+** for matrix multiply, and **5,000+ points** for path loss calculations. Memory transfer overhead (PCIe 3.0: ~8 GB/s practical) means the critical formula is `GPU_worthwhile = compute_time > (2 × transfer_time)`. For 100MB terrain data, transfer overhead is approximately 5-12ms. + +**AMD ROCm/HIP** provides cross-platform compatibility through CuPy (`pip install cupy-rocm-5-0`), with PyTorch and TensorFlow also offering official ROCm builds. **Intel integrated graphics** support via PyOpenCL achieves 2-10x speedups over CPU (3-6x slower than discrete GPUs), suitable for edge deployments with moderate workloads (10,000-100,000 points). + +--- + +## Environment modeling requires frequency-dependent clutter coefficients + +**ITU-R P.1812-6** defines default clutter heights and losses by environment type: dense urban (20-25m height, 15-25 dB loss), urban (15-20m, 10-20 dB), suburban (9-12m, 5-15 dB), rural (0-4m, 0-5 dB), and forest (15-20m, 10-25 dB). The **3GPP TR 38.901** path loss models define specific scenarios: UMa (Urban Macro) with 25m base station height, UMi (Urban Micro Street Canyon) with 10m base station, RMa (Rural Macro), and InF (Indoor Factory) variants. + +For vegetation, **ITU-R P.833-10** specifies excess attenuation using `A_ev = A_m * (1 - exp(-d*γ/A_m))` where specific attenuation γ varies by frequency: **0.06 dB/m at 200 MHz**, **0.20 dB/m at 1 GHz**, and **0.60 dB/m at 5 GHz** for in-leaf conditions. Seasonal variation reduces loss by approximately 20% out-of-leaf for deciduous forests, with **2 dB variation at 900 MHz increasing to 8.5 dB at 1800+ MHz**. + +**Building entry loss** per ITU-R P.2109 distinguishes traditional buildings (median 10-16 dB at 100 MHz to 2 GHz) from thermally-efficient modern buildings with metallized glass and foil insulation (25-32 dB). Material-specific losses from 3GPP TR 38.901 show standard glass at **2.4 dB at 2 GHz**, concrete at **13 dB at 2 GHz increasing to 117 dB at 28 GHz**, and IRR/Low-E glass at **23.6 dB at 2 GHz**. + +--- + +## Machine learning and hybrid approaches complement physics-based models + +Current ML approaches for path loss prediction rank by accuracy: **XGBoost/Gradient Boosting** (RMSE: 2.1-3.4 dB, best for small-medium datasets), Neural Network Ensembles (2.5-4.0 dB), Random Forest (3.0-4.5 dB), and Deep Neural Networks (3.0-5.0 dB). Training data requirements scale predictably: <1,000 samples yield RMSE 6-10 dB, 10,000-100,000 samples achieve production-quality RMSE 2-4 dB. + +**Hybrid physics+ML architectures** prove most effective. The ML Correction approach calculates `PL_total = PL_empirical(d, f, h_tx, h_rx) + ΔPL_ML(features)` where ΔPL_ML learns systematic biases. The LOS/NLOS Ensemble uses a classifier to weight separate LOS and NLOS regressors. Physics-Informed Neural Networks add penalty terms that enforce physical constraints like "path loss should increase with distance" and "FSPL provides a lower bound." + +**Pre-computed propagation databases** store path loss values at 20-50 bytes per grid cell, enabling sub-millisecond lookups. For a 10km radius at 30m resolution (~349,000 cells), storage is approximately 7 MB compressed. Interpolation techniques range from fast bilinear (1-2 dB error) to kriging (higher accuracy with uncertainty estimates). + +--- + +## Tile-based caching enables responsive coverage map delivery + +The optimal caching architecture uses **XYZ (Slippy Map) tiles** with multi-tier storage: L1 in-memory Redis (sub-millisecond access, ~100GB capacity), L2 disk cache (SQLite/MBTiles format), and L3 cloud storage (S3 for permanent pre-computed tiles). Cache keys should incorporate parameter hashes for instant invalidation when transmitter settings change: + +```python +def get_tile_key(z: int, x: int, y: int, params_hash: str) -> str: + return f"tile:coverage:{params_hash}:{z}:{x}:{y}" +``` + +For dynamic coverage, TTL-based expiration (15 minutes to 24 hours) combined with Redis pub/sub channels (`map:update:region:*`) enables targeted geographic invalidation. The hybrid approach pre-computes base zoom levels (z=6-12) for commonly accessed areas while generating higher zoom levels (z>12) on-demand. + +**Level of Detail (LOD) techniques** adapt computation intensity to distance: Tier 1 (0-500m) uses full 3D building geometry with 1m terrain resolution, Tier 2 (500m-2km) uses simplified buildings with 10m terrain, Tier 3 (2-10km) uses clutter heights only with 30m terrain, and Tier 4 (>10km) uses statistical clutter with 90m SRTM terrain. Adaptive grid generation provides higher resolution near the transmitter (10m) transitioning to coarser resolution (100m) at distance, reducing computation while maintaining visual quality where it matters. + +--- + +## Recommended architecture for Python/FastAPI RF coverage backend + +The optimal stack combines **FastAPI** (async API gateway with rate limiting), **Celery** (distributed task queue for heavy RF calculations), **Redis** (tile caching and job status), and **CuPy/Numba** (GPU acceleration). Terrain data should use **numpy.memmap** for memory-mapped access to large DEMs with **STRtree spatial indexing** for tile lookups via Shapely. + +For the propagation engine, implement **Longley-Rice ITM** as the primary terrain model (using the `itmlogic` Python package), **COST-231 Hata** for quick urban estimates, and **Deygout diffraction** for multiple terrain obstacles. The model selection logic should consider frequency range (Hata for 150-1500 MHz, COST-231 for 1500-2000 MHz, ITM for terrain-specific), distance (empirical for <20km, ITM for longer paths), and accuracy requirements (ray tracing only for <5km urban scenarios). + +```python +class GPURFEngine: + def __init__(self, max_points=1_000_000): + # Pre-allocate GPU memory at startup + self.d_buffer = cp.empty((max_points, 3), dtype=cp.float32) + + async def calculate_coverage(self, points: np.ndarray) -> np.ndarray: + if len(points) < 1000: + return self._cpu_fallback(points) # Small workloads on CPU + # GPU path for large workloads + d_points = cp.asarray(points) + # ... GPU computation + return results.get() +``` + +Celery configuration should use separate queues for fast (cached), compute (full calculation), and batch operations, with `worker_prefetch_multiplier=1` for heavy tasks and `task_acks_late=True` for reliability. Output formats should include PNG tiles with colormap lookup for web display and Cloud-Optimized GeoTIFF for professional GIS integration. + +--- + +## Conclusion + +Building efficient RF coverage planning software requires addressing the fundamental mismatch between fast per-point propagation calculations and the overhead of Python's multiprocessing model. **Batch processing (1000+ points per IPC round-trip), shared memory for terrain data, and GPU acceleration for workloads exceeding 10,000 points** provide the foundation for achieving throughput within an order of magnitude of commercial tools. + +The propagation model selection should follow a tiered approach: Longley-Rice ITM for terrain-based VHF/UHF planning with available DEM data, Okumura-Hata/COST-231 for rapid urban cellular estimates, and Deygout diffraction for mountainous terrain with multiple obstacles. Environment modeling through ITU-R P.2108/P.2109/P.833 provides standardized clutter, building entry, and vegetation loss coefficients that maintain accuracy across diverse deployment scenarios. + +The most impactful optimizations in order of implementation priority are: fixing the multiprocessing serialization bottleneck (immediate 100x throughput improvement), implementing tile-based caching with parameter-hash keys (sub-millisecond repeat queries), adding GPU acceleration for large coverage maps (10-50x for >10,000 points), and incorporating LOD techniques (3-10x computation reduction with minimal accuracy impact). This architecture enables a Python/FastAPI backend to compete with commercial tools while maintaining the flexibility for custom propagation models and ML integration. \ No newline at end of file