From 6dcc5a19b9cdc435e54c8d7a56e44831f2976bba Mon Sep 17 00:00:00 2001 From: mytec Date: Tue, 3 Feb 2026 23:24:12 +0200 Subject: [PATCH] @mytec: 3.8.0 start, stable w/0 ref+ --- RFCP-3.8.0-Vectorize-Coverage-Task.md | 181 +++++++++++++++++++++++ backend/app/services/coverage_service.py | 37 +++-- frontend/src/store/coverage.ts | 6 + 3 files changed, 215 insertions(+), 9 deletions(-) create mode 100644 RFCP-3.8.0-Vectorize-Coverage-Task.md diff --git a/RFCP-3.8.0-Vectorize-Coverage-Task.md b/RFCP-3.8.0-Vectorize-Coverage-Task.md new file mode 100644 index 0000000..461a4c2 --- /dev/null +++ b/RFCP-3.8.0-Vectorize-Coverage-Task.md @@ -0,0 +1,181 @@ +# RFCP 3.8.0 — Vectorize Per-Point Coverage Calculations + +## Context + +Iteration 3.7.0 added GPU precompute for distances + base path loss (Phase 2.5). +But Phase 3 (per-point loop) still runs on CPU, one point at a time across workers. +This is where 95% of time goes on Full preset (195s for 6,642 points). + +Current pipeline: +``` +Phase 2.5 (GPU, 0.01s): distances + base path_loss → precomputed arrays +Phase 3 (CPU, 195s): per-point terrain_loss, building_loss, reflections, vegetation +``` + +Goal: Vectorize the heavy per-point calculations so GPU handles them in bulk. + +## Architecture + +The key insight: `_calculate_point_sync` (line ~1127) does these steps per point: + +1. **Terrain LOS check** — get elevation profile between site and point, check clearance +2. **Diffraction loss** — knife-edge based on Fresnel zone clearance +3. **Building obstruction** — find buildings between site and point, calculate penetration loss +4. **Materials penalty** — add loss based on building material type +5. **Dominant path analysis** — LOS vs reflection vs diffraction +6. **Street canyon** — check if point is in urban canyon +7. **Reflections** — find reflection paths off buildings (most expensive!) +8. **Vegetation loss** — check vegetation between site and point +9. **Final RSRP** — tx_power - path_loss - terrain_loss - building_loss - veg_loss + gains + +## Strategy: Vectorize in Stages + +NOT everything can be vectorized equally. Prioritize by time spent: + +### Stage 1: Terrain LOS + Diffraction (HIGH IMPACT) +Currently: For each point, sample ~50-100 elevation values along radial path, +find min clearance, compute knife-edge diffraction. + +**Vectorize**: Create 2D elevation profiles for ALL points at once. +- All points share the same site location +- For N points, create N terrain profiles (each M samples) +- Compute Fresnel clearance for all profiles vectorized +- Compute diffraction loss vectorized + +```python +# Instead of per-point: +for point in grid: + profile = get_terrain_profile(site, point, num_samples=50) + clearance = min_clearance(profile) + loss = diffraction_loss(clearance, freq) + +# Vectorized: +xp = gpu_manager.get_array_module() +# all_profiles shape: (N_points, M_samples) +all_profiles = get_terrain_profiles_batch(site, all_points, num_samples=50) +all_clearances = compute_clearances_batch(all_profiles, site_elev, point_elevs, distances) +all_terrain_loss = diffraction_loss_batch(all_clearances, freq) +``` + +### Stage 2: Building Obstruction (HIGH IMPACT) +Currently: For each point, find nearby buildings, check if they obstruct path. + +**Vectorize**: Use spatial indexing but batch the geometry checks. +- Pre-compute building bounding boxes as GPU arrays +- For each point, ray-building intersection can be done as matrix operation +- Building penetration loss is simple lookup after intersection + +NOTE: This is harder to vectorize because each point has different number of +nearby buildings. Options: +a) Pad to max buildings per point (wastes memory but simple) +b) Use sparse representation +c) Keep per-point but use GPU for the geometry math + +Recommend option (c) initially — keep the spatial query on CPU but move +the trig/geometry calculations to GPU. + +### Stage 3: Reflections (MEDIUM IMPACT, only on Full preset) +Currently: For each point with buildings, compute reflection paths. +This is the most complex calculation and hardest to vectorize. + +**Approach**: Keep reflections per-point for now, but optimize the inner math +with vectorized operations. + +### Stage 4: Vegetation Loss (LOW IMPACT) +Simple lookup — not worth GPU overhead. + +## Implementation Plan + +### Step 1: Batch terrain profiling +Add to coverage_service.py a new method: +```python +def _batch_terrain_profiles(self, site_lat, site_lon, site_elev, + grid_lats, grid_lons, grid_elevs, + distances, frequency, num_samples=50): + """Compute terrain LOS and diffraction loss for all points at once.""" + xp = gpu_manager.get_array_module() + N = len(grid_lats) + + # Interpolate terrain profiles for all points + # Each profile: site → point, num_samples elevation values + # Use terrain tile data directly + + # Compute Fresnel zone clearance for each profile + # Compute knife-edge diffraction loss + + return terrain_losses # shape (N,) +``` + +### Step 2: Batch building check +Add method: +```python +def _batch_building_obstruction(self, site_lat, site_lon, + grid_lats, grid_lons, + distances, buildings_spatial_index, + all_buildings): + """Compute building loss for all points at once.""" + # For each point, query spatial index (CPU) + # Batch the geometry intersection math (GPU) + # Return losses + + return building_losses # shape (N,) +``` + +### Step 3: Replace _run_point_loop +Instead of ProcessPool workers, do: +```python +# In calculate_coverage, after Phase 2.5: +terrain_losses = self._batch_terrain_profiles(...) +building_losses = self._batch_building_obstruction(...) + +# Final RSRP is now fully vectorized: +rsrp = tx_power - precomputed_path_loss - terrain_losses - building_losses - veg_losses +# + antenna_gains + reflection_gains +``` + +### Step 4: Keep worker fallback +If GPU not available or for very complex calculations (reflections), +fall back to the existing per-point ProcessPool approach. + +## Important Notes + +1. **GPU code only in main process** — learned from 3.7.0, never import gpu_manager in workers +2. **Terrain data access** — terrain tiles are in memory, need efficient sampling for batch profiles +3. **CuPy ↔ NumPy bridge** — use `xp.asnumpy()` or `.get()` to convert back to CPU +4. **Memory** — 6,642 points × 50 terrain samples = 332,100 floats = 2.5 MB on GPU, no problem +5. **Accuracy** — results must match existing per-point calculation within 1 dB + +## Testing + +```powershell +cd D:\root\rfcp\backend +pyinstaller ..\installer\rfcp-server-gpu.spec --noconfirm +.\dist\rfcp-server\rfcp-server.exe +``` + +Compare Full preset: +- Before (3.7.0): ~195s for 6,642 points +- Target (3.8.0): <30s for same calculation +- Stretch goal: <10s + +Verify accuracy: +- Run same location with GPU and CPU backend +- Compare RSRP values — should be within 1 dB +- Coverage percentages (Excellent/Good/Fair/Weak) should be very close + +## What NOT to Change + +- Don't modify propagation model math (Okumura-Hata, COST-231, Free-Space formulas) +- Don't change API endpoints or response format +- Don't remove the ProcessPool fallback — keep it for CPU-only mode +- Don't change OSM fetching or caching +- Don't modify the frontend + +## Success Criteria + +- [ ] Full preset completes in <30s (was 195s) +- [ ] Standard preset completes in <5s (was 7.2s) +- [ ] No CuPy errors in worker processes +- [ ] CPU fallback still works +- [ ] Results match within 1 dB accuracy +- [ ] GPU utilization visible in Task Manager during calculation diff --git a/backend/app/services/coverage_service.py b/backend/app/services/coverage_service.py index 4035dc3..54d9f0f 100644 --- a/backend/app/services/coverage_service.py +++ b/backend/app/services/coverage_service.py @@ -62,6 +62,9 @@ from app.services.parallel_coverage_service import ( calculate_coverage_parallel, get_cpu_count, get_parallel_backend, CancellationToken, ) +# NOTE: gpu_manager and gpu_service are imported INSIDE functions that need them, +# NOT at module level. This prevents worker processes from initializing CuPy/CUDA +# which causes cudaErrorInsufficientDriver errors in child processes. # ── New propagation models (Phase 3.0) ── from app.propagation.base import PropagationModel, PropagationInput, PropagationOutput @@ -546,8 +549,11 @@ class CoverageService: from app.services.gpu_service import gpu_service t_gpu = time.time() - grid_lats = np.array([lat for lat, lon in grid]) - grid_lons = np.array([lon for lat, lon in grid]) + # Import GPU modules here (main process only) to avoid CUDA context issues in workers + from app.services.gpu_backend import gpu_manager + xp = gpu_manager.get_array_module() + grid_lats = xp.array([lat for lat, lon in grid], dtype=xp.float64) + grid_lons = xp.array([lon for lat, lon in grid], dtype=xp.float64) pre_distances = gpu_service.precompute_distances( grid_lats, grid_lons, site.lat, site.lon @@ -556,6 +562,9 @@ class CoverageService: pre_distances, site.frequency, site.height, environment=getattr(settings, 'environment', 'urban'), ) + gpu_time = time.time() - t_gpu + backend_name = "GPU (CUDA)" if gpu_manager.gpu_available else "CPU (NumPy)" + _clog(f"Precomputed {len(grid)} distances+path_loss on {backend_name} in {gpu_time:.2f}s") # Build lookup dict for point loop precomputed = {} @@ -918,9 +927,12 @@ class CoverageService: await asyncio.sleep(0) from app.services.gpu_service import gpu_service + from app.services.gpu_backend import gpu_manager - grid_lats = np.array([lat for lat, _lon in tile_grid]) - grid_lons = np.array([_lon for _lat, _lon in tile_grid]) + t_gpu = time.time() + xp = gpu_manager.get_array_module() + grid_lats = xp.array([lat for lat, _lon in tile_grid], dtype=xp.float64) + grid_lons = xp.array([_lon for _lat, _lon in tile_grid], dtype=xp.float64) pre_distances = gpu_service.precompute_distances( grid_lats, grid_lons, site.lat, site.lon, @@ -929,6 +941,9 @@ class CoverageService: pre_distances, site.frequency, site.height, environment=getattr(settings, 'environment', 'urban'), ) + gpu_time = time.time() - t_gpu + backend_name = "GPU (CUDA)" if gpu_manager.gpu_available else "CPU (NumPy)" + _clog(f"Tile {tile_idx+1}: precomputed {len(tile_grid)} pts on {backend_name} in {gpu_time:.2f}s") precomputed = {} for i, (lat, lon) in enumerate(tile_grid): @@ -1405,14 +1420,18 @@ class CoverageService: lat2: float, lon2: float ) -> float: """Calculate bearing from point 1 to point 2 (degrees)""" - lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2]) + # Use math for scalar operations (faster than numpy/cupy for single values) + lat1_r = math.radians(lat1) + lon1_r = math.radians(lon1) + lat2_r = math.radians(lat2) + lon2_r = math.radians(lon2) - dlon = lon2 - lon1 + dlon = lon2_r - lon1_r - x = np.sin(dlon) * np.cos(lat2) - y = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(dlon) + x = math.sin(dlon) * math.cos(lat2_r) + y = math.cos(lat1_r) * math.sin(lat2_r) - math.sin(lat1_r) * math.cos(lat2_r) * math.cos(dlon) - bearing = np.degrees(np.arctan2(x, y)) + bearing = math.degrees(math.atan2(x, y)) return (bearing + 360) % 360 diff --git a/frontend/src/store/coverage.ts b/frontend/src/store/coverage.ts index ede7d10..6f7c951 100644 --- a/frontend/src/store/coverage.ts +++ b/frontend/src/store/coverage.ts @@ -218,6 +218,12 @@ export const useCoverageStore = create((set, get) => ({ setError: (error) => set({ error }), calculateCoverage: async () => { + // Guard against duplicate calculations + if (get().isCalculating) { + console.warn('[Coverage] Calculation already in progress, ignoring duplicate request'); + return; + } + const { settings } = get(); const sites = useSitesStore.getState().sites;