From 6dcc5a19b9cdc435e54c8d7a56e44831f2976bba Mon Sep 17 00:00:00 2001
From: mytec <mytec@eliah.one>
Date: Tue, 3 Feb 2026 23:24:12 +0200
Subject: [PATCH] @mytec: 3.8.0 start, stable w/0 ref+

---
 RFCP-3.8.0-Vectorize-Coverage-Task.md    | 181 +++++++++++++++++++++++
 backend/app/services/coverage_service.py |  37 +++--
 frontend/src/store/coverage.ts           |   6 +
 3 files changed, 215 insertions(+), 9 deletions(-)
 create mode 100644 RFCP-3.8.0-Vectorize-Coverage-Task.md

diff --git a/RFCP-3.8.0-Vectorize-Coverage-Task.md b/RFCP-3.8.0-Vectorize-Coverage-Task.md
new file mode 100644
index 0000000..461a4c2
--- /dev/null
+++ b/RFCP-3.8.0-Vectorize-Coverage-Task.md
@@ -0,0 +1,181 @@
+# RFCP 3.8.0 — Vectorize Per-Point Coverage Calculations
+
+## Context
+
+Iteration 3.7.0 added GPU precompute for distances + base path loss (Phase 2.5).
+But Phase 3 (per-point loop) still runs on CPU, one point at a time across workers.
+This is where 95% of time goes on Full preset (195s for 6,642 points).
+
+Current pipeline:
+```
+Phase 2.5 (GPU, 0.01s): distances + base path_loss → precomputed arrays
+Phase 3 (CPU, 195s): per-point terrain_loss, building_loss, reflections, vegetation
+```
+
+Goal: Vectorize the heavy per-point calculations so GPU handles them in bulk.
+
+## Architecture
+
+The key insight: `_calculate_point_sync` (line ~1127) does these steps per point:
+
+1. **Terrain LOS check** — get elevation profile between site and point, check clearance
+2. **Diffraction loss** — knife-edge based on Fresnel zone clearance  
+3. **Building obstruction** — find buildings between site and point, calculate penetration loss
+4. **Materials penalty** — add loss based on building material type
+5. **Dominant path analysis** — LOS vs reflection vs diffraction
+6. **Street canyon** — check if point is in urban canyon
+7. **Reflections** — find reflection paths off buildings (most expensive!)
+8. **Vegetation loss** — check vegetation between site and point
+9. **Final RSRP** — tx_power - path_loss - terrain_loss - building_loss - veg_loss + gains
+
+## Strategy: Vectorize in Stages
+
+NOT everything can be vectorized equally. Prioritize by time spent:
+
+### Stage 1: Terrain LOS + Diffraction (HIGH IMPACT)
+Currently: For each point, sample ~50-100 elevation values along radial path,
+find min clearance, compute knife-edge diffraction.
+
+**Vectorize**: Create 2D elevation profiles for ALL points at once.
+- All points share the same site location
+- For N points, create N terrain profiles (each M samples)  
+- Compute Fresnel clearance for all profiles vectorized
+- Compute diffraction loss vectorized
+
+```python
+# Instead of per-point:
+for point in grid:
+    profile = get_terrain_profile(site, point, num_samples=50)
+    clearance = min_clearance(profile)
+    loss = diffraction_loss(clearance, freq)
+
+# Vectorized:
+xp = gpu_manager.get_array_module()
+# all_profiles shape: (N_points, M_samples)
+all_profiles = get_terrain_profiles_batch(site, all_points, num_samples=50)
+all_clearances = compute_clearances_batch(all_profiles, site_elev, point_elevs, distances)
+all_terrain_loss = diffraction_loss_batch(all_clearances, freq)
+```
+
+### Stage 2: Building Obstruction (HIGH IMPACT)  
+Currently: For each point, find nearby buildings, check if they obstruct path.
+
+**Vectorize**: Use spatial indexing but batch the geometry checks.
+- Pre-compute building bounding boxes as GPU arrays
+- For each point, ray-building intersection can be done as matrix operation
+- Building penetration loss is simple lookup after intersection
+
+NOTE: This is harder to vectorize because each point has different number of 
+nearby buildings. Options:
+a) Pad to max buildings per point (wastes memory but simple)
+b) Use sparse representation
+c) Keep per-point but use GPU for the geometry math
+
+Recommend option (c) initially — keep the spatial query on CPU but move 
+the trig/geometry calculations to GPU.
+
+### Stage 3: Reflections (MEDIUM IMPACT, only on Full preset)
+Currently: For each point with buildings, compute reflection paths.
+This is the most complex calculation and hardest to vectorize.
+
+**Approach**: Keep reflections per-point for now, but optimize the inner math 
+with vectorized operations.
+
+### Stage 4: Vegetation Loss (LOW IMPACT)
+Simple lookup — not worth GPU overhead.
+
+## Implementation Plan
+
+### Step 1: Batch terrain profiling
+Add to coverage_service.py a new method:
+```python
+def _batch_terrain_profiles(self, site_lat, site_lon, site_elev,
+                             grid_lats, grid_lons, grid_elevs, 
+                             distances, frequency, num_samples=50):
+    """Compute terrain LOS and diffraction loss for all points at once."""
+    xp = gpu_manager.get_array_module()
+    N = len(grid_lats)
+    
+    # Interpolate terrain profiles for all points
+    # Each profile: site → point, num_samples elevation values
+    # Use terrain tile data directly
+    
+    # Compute Fresnel zone clearance for each profile
+    # Compute knife-edge diffraction loss
+    
+    return terrain_losses  # shape (N,)
+```
+
+### Step 2: Batch building check
+Add method:
+```python
+def _batch_building_obstruction(self, site_lat, site_lon,
+                                  grid_lats, grid_lons, 
+                                  distances, buildings_spatial_index,
+                                  all_buildings):
+    """Compute building loss for all points at once."""
+    # For each point, query spatial index (CPU)
+    # Batch the geometry intersection math (GPU)
+    # Return losses
+    
+    return building_losses  # shape (N,)
+```
+
+### Step 3: Replace _run_point_loop
+Instead of ProcessPool workers, do:
+```python
+# In calculate_coverage, after Phase 2.5:
+terrain_losses = self._batch_terrain_profiles(...)
+building_losses = self._batch_building_obstruction(...)
+
+# Final RSRP is now fully vectorized:
+rsrp = tx_power - precomputed_path_loss - terrain_losses - building_losses - veg_losses
+# + antenna_gains + reflection_gains
+```
+
+### Step 4: Keep worker fallback
+If GPU not available or for very complex calculations (reflections),
+fall back to the existing per-point ProcessPool approach.
+
+## Important Notes
+
+1. **GPU code only in main process** — learned from 3.7.0, never import gpu_manager in workers
+2. **Terrain data access** — terrain tiles are in memory, need efficient sampling for batch profiles
+3. **CuPy ↔ NumPy bridge** — use `xp.asnumpy()` or `.get()` to convert back to CPU
+4. **Memory** — 6,642 points × 50 terrain samples = 332,100 floats = 2.5 MB on GPU, no problem
+5. **Accuracy** — results must match existing per-point calculation within 1 dB
+
+## Testing
+
+```powershell
+cd D:\root\rfcp\backend
+pyinstaller ..\installer\rfcp-server-gpu.spec --noconfirm
+.\dist\rfcp-server\rfcp-server.exe
+```
+
+Compare Full preset:
+- Before (3.7.0): ~195s for 6,642 points
+- Target (3.8.0): <30s for same calculation
+- Stretch goal: <10s
+
+Verify accuracy:
+- Run same location with GPU and CPU backend
+- Compare RSRP values — should be within 1 dB
+- Coverage percentages (Excellent/Good/Fair/Weak) should be very close
+
+## What NOT to Change
+
+- Don't modify propagation model math (Okumura-Hata, COST-231, Free-Space formulas)
+- Don't change API endpoints or response format  
+- Don't remove the ProcessPool fallback — keep it for CPU-only mode
+- Don't change OSM fetching or caching
+- Don't modify the frontend
+
+## Success Criteria
+
+- [ ] Full preset completes in <30s (was 195s)
+- [ ] Standard preset completes in <5s (was 7.2s)  
+- [ ] No CuPy errors in worker processes
+- [ ] CPU fallback still works
+- [ ] Results match within 1 dB accuracy
+- [ ] GPU utilization visible in Task Manager during calculation
diff --git a/backend/app/services/coverage_service.py b/backend/app/services/coverage_service.py
index 4035dc3..54d9f0f 100644
--- a/backend/app/services/coverage_service.py
+++ b/backend/app/services/coverage_service.py
@@ -62,6 +62,9 @@ from app.services.parallel_coverage_service import (
     calculate_coverage_parallel, get_cpu_count, get_parallel_backend,
     CancellationToken,
 )
+# NOTE: gpu_manager and gpu_service are imported INSIDE functions that need them,
+# NOT at module level. This prevents worker processes from initializing CuPy/CUDA
+# which causes cudaErrorInsufficientDriver errors in child processes.
 
 # ── New propagation models (Phase 3.0) ──
 from app.propagation.base import PropagationModel, PropagationInput, PropagationOutput
@@ -546,8 +549,11 @@ class CoverageService:
         from app.services.gpu_service import gpu_service
 
         t_gpu = time.time()
-        grid_lats = np.array([lat for lat, lon in grid])
-        grid_lons = np.array([lon for lat, lon in grid])
+        # Import GPU modules here (main process only) to avoid CUDA context issues in workers
+        from app.services.gpu_backend import gpu_manager
+        xp = gpu_manager.get_array_module()
+        grid_lats = xp.array([lat for lat, lon in grid], dtype=xp.float64)
+        grid_lons = xp.array([lon for lat, lon in grid], dtype=xp.float64)
 
         pre_distances = gpu_service.precompute_distances(
             grid_lats, grid_lons, site.lat, site.lon
@@ -556,6 +562,9 @@ class CoverageService:
             pre_distances, site.frequency, site.height,
             environment=getattr(settings, 'environment', 'urban'),
         )
+        gpu_time = time.time() - t_gpu
+        backend_name = "GPU (CUDA)" if gpu_manager.gpu_available else "CPU (NumPy)"
+        _clog(f"Precomputed {len(grid)} distances+path_loss on {backend_name} in {gpu_time:.2f}s")
 
         # Build lookup dict for point loop
         precomputed = {}
@@ -918,9 +927,12 @@ class CoverageService:
             await asyncio.sleep(0)
 
             from app.services.gpu_service import gpu_service
+            from app.services.gpu_backend import gpu_manager
 
-            grid_lats = np.array([lat for lat, _lon in tile_grid])
-            grid_lons = np.array([_lon for _lat, _lon in tile_grid])
+            t_gpu = time.time()
+            xp = gpu_manager.get_array_module()
+            grid_lats = xp.array([lat for lat, _lon in tile_grid], dtype=xp.float64)
+            grid_lons = xp.array([_lon for _lat, _lon in tile_grid], dtype=xp.float64)
 
             pre_distances = gpu_service.precompute_distances(
                 grid_lats, grid_lons, site.lat, site.lon,
@@ -929,6 +941,9 @@ class CoverageService:
                 pre_distances, site.frequency, site.height,
                 environment=getattr(settings, 'environment', 'urban'),
             )
+            gpu_time = time.time() - t_gpu
+            backend_name = "GPU (CUDA)" if gpu_manager.gpu_available else "CPU (NumPy)"
+            _clog(f"Tile {tile_idx+1}: precomputed {len(tile_grid)} pts on {backend_name} in {gpu_time:.2f}s")
 
             precomputed = {}
             for i, (lat, lon) in enumerate(tile_grid):
@@ -1405,14 +1420,18 @@ class CoverageService:
         lat2: float, lon2: float
     ) -> float:
         """Calculate bearing from point 1 to point 2 (degrees)"""
-        lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
+        # Use math for scalar operations (faster than numpy/cupy for single values)
+        lat1_r = math.radians(lat1)
+        lon1_r = math.radians(lon1)
+        lat2_r = math.radians(lat2)
+        lon2_r = math.radians(lon2)
 
-        dlon = lon2 - lon1
+        dlon = lon2_r - lon1_r
 
-        x = np.sin(dlon) * np.cos(lat2)
-        y = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(dlon)
+        x = math.sin(dlon) * math.cos(lat2_r)
+        y = math.cos(lat1_r) * math.sin(lat2_r) - math.sin(lat1_r) * math.cos(lat2_r) * math.cos(dlon)
 
-        bearing = np.degrees(np.arctan2(x, y))
+        bearing = math.degrees(math.atan2(x, y))
 
         return (bearing + 360) % 360
 
diff --git a/frontend/src/store/coverage.ts b/frontend/src/store/coverage.ts
index ede7d10..6f7c951 100644
--- a/frontend/src/store/coverage.ts
+++ b/frontend/src/store/coverage.ts
@@ -218,6 +218,12 @@ export const useCoverageStore = create<CoverageState>((set, get) => ({
   setError: (error) => set({ error }),
 
   calculateCoverage: async () => {
+    // Guard against duplicate calculations
+    if (get().isCalculating) {
+      console.warn('[Coverage] Calculation already in progress, ignoring duplicate request');
+      return;
+    }
+
     const { settings } = get();
     const sites = useSitesStore.getState().sites;