@mytec: iter2.5 vectorization start

2026-02-01 13:13:39 +02:00
parent 4026233b21
commit acc90fe538
8 changed files with 747 additions and 71 deletions
--- a/RFCP-Phase-2.5.0-NumPy-Vectorization.md
+++ b/RFCP-Phase-2.5.0-NumPy-Vectorization.md
@@ -0,0 +1,627 @@
+# RFCP Phase 2.5.0: NumPy Vectorization
+
+**Date:** February 1, 2025  
+**Type:** Performance Optimization  
+**Priority:** HIGH  
+**Goal:** 10-50x speedup for Detailed preset via NumPy vectorization
+
+---
+
+## 🎯 Problem
+
+Detailed preset: **346ms/point** — way too slow, causes 5 min timeout.
+
+Root cause: Python loops in dominant_path_service.py
+```python
+for building in buildings:      # 50 iterations
+    for reflector in reflectors:  # 30 iterations
+        # Math operations...
+```
+
+**1500 Python loop iterations** with function calls = slow.
+
+---
+
+## 🚀 Solution: NumPy Vectorization
+
+Replace Python loops with NumPy batch operations:
+- **Before:** 1500 function calls, 1500 loop iterations
+- **After:** ~10 function calls, 0 Python loops
+- **Expected speedup:** 10-50x
+
+---
+
+## 📁 Files to Create/Modify
+
+### NEW: `backend/app/services/geometry_vectorized.py`
+
+Core vectorized geometry functions:
+
+```python
+"""
+Vectorized geometry operations using NumPy.
+All functions operate on arrays, not single values.
+"""
+import numpy as np
+from typing import Tuple, Optional
+
+# Earth radius in meters
+EARTH_RADIUS = 6371000
+
+
+def haversine_batch(
+    lat1: float, lon1: float,
+    lats2: np.ndarray, lons2: np.ndarray
+) -> np.ndarray:
+    """
+    Calculate distances from one point to many points.
+    
+    Args:
+        lat1, lon1: Single origin point (degrees)
+        lats2, lons2: Arrays of destination points (degrees), shape (N,)
+    
+    Returns:
+        distances: Array of distances in meters, shape (N,)
+    """
+    lat1_rad = np.radians(lat1)
+    lon1_rad = np.radians(lon1)
+    lats2_rad = np.radians(lats2)
+    lons2_rad = np.radians(lons2)
+    
+    dlat = lats2_rad - lat1_rad
+    dlon = lons2_rad - lon1_rad
+    
+    a = np.sin(dlat / 2) ** 2 + np.cos(lat1_rad) * np.cos(lats2_rad) * np.sin(dlon / 2) ** 2
+    c = 2 * np.arcsin(np.sqrt(a))
+    
+    return EARTH_RADIUS * c
+
+
+def haversine_matrix(
+    lats1: np.ndarray, lons1: np.ndarray,
+    lats2: np.ndarray, lons2: np.ndarray
+) -> np.ndarray:
+    """
+    Calculate distances between all pairs of points (M×N matrix).
+    
+    Args:
+        lats1, lons1: First set of points, shape (M,)
+        lats2, lons2: Second set of points, shape (N,)
+    
+    Returns:
+        distances: Matrix of distances, shape (M, N)
+    """
+    # Reshape for broadcasting: (M, 1) and (1, N)
+    lats1_rad = np.radians(lats1[:, np.newaxis])
+    lons1_rad = np.radians(lons1[:, np.newaxis])
+    lats2_rad = np.radians(lats2[np.newaxis, :])
+    lons2_rad = np.radians(lons2[np.newaxis, :])
+    
+    dlat = lats2_rad - lats1_rad
+    dlon = lons2_rad - lons1_rad
+    
+    a = np.sin(dlat / 2) ** 2 + np.cos(lats1_rad) * np.cos(lats2_rad) * np.sin(dlon / 2) ** 2
+    c = 2 * np.arcsin(np.sqrt(a))
+    
+    return EARTH_RADIUS * c
+
+
+def points_to_local_coords(
+    ref_lat: float, ref_lon: float,
+    lats: np.ndarray, lons: np.ndarray
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Convert lat/lon to local X/Y coordinates (meters from reference).
+    Uses simple equirectangular projection (good for small areas).
+    
+    Args:
+        ref_lat, ref_lon: Reference point (degrees)
+        lats, lons: Points to convert, shape (N,)
+    
+    Returns:
+        x, y: Local coordinates in meters, shape (N,)
+    """
+    cos_lat = np.cos(np.radians(ref_lat))
+    
+    x = (lons - ref_lon) * 111320 * cos_lat
+    y = (lats - ref_lat) * 110540
+    
+    return x, y
+
+
+def line_segments_intersect_batch(
+    p1: np.ndarray, p2: np.ndarray,
+    segments_start: np.ndarray, segments_end: np.ndarray
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Check if line segment p1→p2 intersects with multiple segments.
+    Uses vectorized cross-product method.
+    
+    Args:
+        p1: Start point (2,) — [x, y]
+        p2: End point (2,) — [x, y]
+        segments_start: Segment start points (N, 2)
+        segments_end: Segment end points (N, 2)
+    
+    Returns:
+        intersects: Boolean array (N,) — True if intersects
+        t_values: Parameter values (N,) — where along p1→p2 intersection occurs
+    """
+    d = p2 - p1  # Direction of main line
+    
+    # Segment directions
+    seg_d = segments_end - segments_start  # (N, 2)
+    
+    # Cross product for parallel check
+    cross = d[0] * seg_d[:, 1] - d[1] * seg_d[:, 0]  # (N,)
+    
+    # Avoid division by zero
+    parallel_mask = np.abs(cross) < 1e-10
+    cross_safe = np.where(parallel_mask, 1.0, cross)
+    
+    # Vector from segment start to p1
+    dp = p1 - segments_start  # (N, 2)
+    
+    # Calculate t (parameter on main line) and u (parameter on segments)
+    t = (dp[:, 0] * seg_d[:, 1] - dp[:, 1] * seg_d[:, 0]) / cross_safe
+    u = (dp[:, 0] * d[1] - dp[:, 1] * d[0]) / cross_safe
+    
+    # Intersection if 0 <= t <= 1 and 0 <= u <= 1
+    intersects = ~parallel_mask & (t >= 0) & (t <= 1) & (u >= 0) & (u <= 1)
+    
+    return intersects, t
+
+
+def line_intersects_polygons_batch(
+    p1: np.ndarray, p2: np.ndarray,
+    polygons_x: np.ndarray, polygons_y: np.ndarray,
+    polygon_lengths: np.ndarray
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Check if line p1→p2 intersects multiple polygons.
+    
+    Args:
+        p1: Start point (2,) — [x, y]
+        p2: End point (2,) — [x, y]
+        polygons_x: Flattened polygon X coords (total_vertices,)
+        polygons_y: Flattened polygon Y coords (total_vertices,)
+        polygon_lengths: Number of vertices per polygon (num_polygons,)
+    
+    Returns:
+        intersects: Boolean array (num_polygons,)
+        min_distances: Distance to first intersection (num_polygons,)
+    """
+    num_polygons = len(polygon_lengths)
+    intersects = np.zeros(num_polygons, dtype=bool)
+    min_t = np.ones(num_polygons) * np.inf
+    
+    # Process each polygon
+    idx = 0
+    for i, length in enumerate(polygon_lengths):
+        if length < 3:
+            idx += length
+            continue
+            
+        # Get polygon vertices
+        px = polygons_x[idx:idx + length]
+        py = polygons_y[idx:idx + length]
+        
+        # Create edge segments (including closing edge)
+        starts = np.stack([px, py], axis=1)  # (length, 2)
+        ends = np.stack([np.roll(px, -1), np.roll(py, -1)], axis=1)  # (length, 2)
+        
+        # Check intersections with all edges
+        edge_intersects, t_vals = line_segments_intersect_batch(p1, p2, starts, ends)
+        
+        if np.any(edge_intersects):
+            intersects[i] = True
+            min_t[i] = np.min(t_vals[edge_intersects])
+        
+        idx += length
+    
+    # Convert t to distance
+    line_length = np.linalg.norm(p2 - p1)
+    min_distances = min_t * line_length
+    
+    return intersects, min_distances
+
+
+def calculate_reflection_points_batch(
+    tx: np.ndarray,
+    rx: np.ndarray,
+    wall_starts: np.ndarray,
+    wall_ends: np.ndarray
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Calculate reflection points on multiple walls.
+    Uses mirror image method.
+    
+    Args:
+        tx: Transmitter position (2,) — [x, y]
+        rx: Receiver position (2,) — [x, y]
+        wall_starts: Wall start points (N, 2)
+        wall_ends: Wall end points (N, 2)
+    
+    Returns:
+        reflection_points: Reflection point on each wall (N, 2)
+        valid: Boolean mask for valid reflections (N,)
+    """
+    # Wall vectors and normals
+    wall_vec = wall_ends - wall_starts  # (N, 2)
+    wall_length = np.linalg.norm(wall_vec, axis=1, keepdims=True)
+    wall_unit = wall_vec / np.maximum(wall_length, 1e-10)  # (N, 2)
+    
+    # Normal vectors (perpendicular to wall)
+    normals = np.stack([-wall_unit[:, 1], wall_unit[:, 0]], axis=1)  # (N, 2)
+    
+    # Mirror TX across each wall
+    tx_to_wall = tx - wall_starts  # (N, 2)
+    tx_dist_to_wall = np.sum(tx_to_wall * normals, axis=1, keepdims=True)  # (N, 1)
+    tx_mirror = tx - 2 * tx_dist_to_wall * normals  # (N, 2)
+    
+    # Find intersection of rx→tx_mirror with wall
+    # Parametric: wall_start + t * wall_vec = rx + s * (tx_mirror - rx)
+    rx_to_mirror = tx_mirror - rx  # (N, 2)
+    
+    # Solve for t (position along wall)
+    cross_denom = (rx_to_mirror[:, 0] * wall_vec[:, 1] - 
+                   rx_to_mirror[:, 1] * wall_vec[:, 0])
+    
+    # Avoid division by zero
+    valid_denom = np.abs(cross_denom) > 1e-10
+    cross_denom_safe = np.where(valid_denom, cross_denom, 1.0)
+    
+    rx_to_start = wall_starts - rx  # (N, 2)
+    t = (rx_to_start[:, 0] * rx_to_mirror[:, 1] - 
+         rx_to_start[:, 1] * rx_to_mirror[:, 0]) / cross_denom_safe
+    
+    # Reflection point
+    reflection_points = wall_starts + t[:, np.newaxis] * wall_vec
+    
+    # Valid if t in [0, 1] and TX is on correct side of wall
+    valid = valid_denom & (t >= 0) & (t <= 1) & (tx_dist_to_wall[:, 0] > 0)
+    
+    return reflection_points, valid
+
+
+def find_best_reflection_path_vectorized(
+    tx: np.ndarray,
+    rx: np.ndarray,
+    building_walls_start: np.ndarray,
+    building_walls_end: np.ndarray,
+    wall_to_building: np.ndarray,
+    obstacle_polygons_x: np.ndarray,
+    obstacle_polygons_y: np.ndarray,
+    obstacle_lengths: np.ndarray,
+    max_candidates: int = 50
+) -> Tuple[Optional[np.ndarray], float, float]:
+    """
+    Find best single-reflection path using vectorized operations.
+    
+    Args:
+        tx: Transmitter [x, y]
+        rx: Receiver [x, y]
+        building_walls_start: All wall start points (W, 2)
+        building_walls_end: All wall end points (W, 2)
+        wall_to_building: Mapping wall index → building index (W,)
+        obstacle_*: Obstacle polygons for LOS checks
+        max_candidates: Max reflection candidates to evaluate
+    
+    Returns:
+        best_reflection_point: [x, y] or None
+        best_path_length: Total path length
+        best_reflection_loss: dB loss from reflection
+    """
+    num_walls = len(building_walls_start)
+    
+    if num_walls == 0:
+        return None, np.inf, 0.0
+    
+    # Step 1: Calculate all reflection points at once
+    refl_points, valid = calculate_reflection_points_batch(
+        tx, rx, building_walls_start, building_walls_end
+    )
+    
+    if not np.any(valid):
+        return None, np.inf, 0.0
+    
+    # Step 2: Calculate path lengths for valid reflections
+    valid_indices = np.where(valid)[0]
+    valid_refl = refl_points[valid]  # (V, 2)
+    
+    # TX → reflection distances
+    tx_to_refl = np.linalg.norm(valid_refl - tx, axis=1)  # (V,)
+    
+    # Reflection → RX distances
+    refl_to_rx = np.linalg.norm(rx - valid_refl, axis=1)  # (V,)
+    
+    # Total path lengths
+    path_lengths = tx_to_refl + refl_to_rx  # (V,)
+    
+    # Step 3: Sort by path length, take top candidates
+    if len(valid_indices) > max_candidates:
+        top_indices = np.argpartition(path_lengths, max_candidates)[:max_candidates]
+        valid_indices = valid_indices[top_indices]
+        valid_refl = valid_refl[top_indices]
+        path_lengths = path_lengths[top_indices]
+        tx_to_refl = tx_to_refl[top_indices]
+        refl_to_rx = refl_to_rx[top_indices]
+    
+    # Step 4: Check LOS for each candidate (this is still a loop, but limited)
+    best_idx = -1
+    best_length = np.inf
+    
+    for i, (refl_pt, length) in enumerate(zip(valid_refl, path_lengths)):
+        # Skip if already longer than best
+        if length >= best_length:
+            continue
+        
+        # Check TX → reflection LOS
+        intersects1, _ = line_intersects_polygons_batch(
+            tx, refl_pt, obstacle_polygons_x, obstacle_polygons_y, obstacle_lengths
+        )
+        
+        if np.any(intersects1):
+            continue
+        
+        # Check reflection → RX LOS
+        intersects2, _ = line_intersects_polygons_batch(
+            refl_pt, rx, obstacle_polygons_x, obstacle_polygons_y, obstacle_lengths
+        )
+        
+        if np.any(intersects2):
+            continue
+        
+        # Valid path found
+        best_idx = i
+        best_length = length
+    
+    if best_idx < 0:
+        return None, np.inf, 0.0
+    
+    best_point = valid_refl[best_idx]
+    
+    # Reflection loss (simplified: 3-10 dB depending on angle)
+    # More grazing angle = more loss
+    direct_dist = np.linalg.norm(rx - tx)
+    path_ratio = best_length / max(direct_dist, 1.0)
+    reflection_loss = 3.0 + 7.0 * min(1.0, (path_ratio - 1.0) * 2)
+    
+    return best_point, best_length, reflection_loss
+```
+
+---
+
+### MODIFY: `backend/app/services/dominant_path_service.py`
+
+Replace loop-based calculations with vectorized versions:
+
+```python
+# Add imports at top:
+from .geometry_vectorized import (
+    haversine_batch,
+    points_to_local_coords,
+    line_intersects_polygons_batch,
+    find_best_reflection_path_vectorized
+)
+
+# Add helper to convert buildings to numpy arrays:
+def _buildings_to_arrays(buildings: list, ref_lat: float, ref_lon: float):
+    """Convert building list to numpy arrays for vectorized ops."""
+    if not buildings:
+        return None, None, None, None, None
+    
+    # Extract centroids
+    lats = np.array([b.get('centroid_lat', b.get('lat', 0)) for b in buildings])
+    lons = np.array([b.get('centroid_lon', b.get('lon', 0)) for b in buildings])
+    
+    # Convert to local coords
+    x, y = points_to_local_coords(ref_lat, ref_lon, lats, lons)
+    
+    # Extract all walls (polygon edges)
+    all_walls_start = []
+    all_walls_end = []
+    wall_to_building = []
+    
+    # Flatten polygons for intersection tests
+    all_poly_x = []
+    all_poly_y = []
+    poly_lengths = []
+    
+    for i, b in enumerate(buildings):
+        coords = b.get('geometry', {}).get('coordinates', [[]])[0]
+        if len(coords) < 3:
+            poly_lengths.append(0)
+            continue
+        
+        # Convert polygon to local coords
+        poly_lats = np.array([c[1] for c in coords])
+        poly_lons = np.array([c[0] for c in coords])
+        px, py = points_to_local_coords(ref_lat, ref_lon, poly_lats, poly_lons)
+        
+        all_poly_x.extend(px)
+        all_poly_y.extend(py)
+        poly_lengths.append(len(coords))
+        
+        # Extract walls
+        for j in range(len(coords) - 1):
+            all_walls_start.append([px[j], py[j]])
+            all_walls_end.append([px[j+1], py[j+1]])
+            wall_to_building.append(i)
+    
+    return (
+        np.array(all_walls_start) if all_walls_start else np.zeros((0, 2)),
+        np.array(all_walls_end) if all_walls_end else np.zeros((0, 2)),
+        np.array(wall_to_building) if wall_to_building else np.zeros(0, dtype=int),
+        np.array(all_poly_x),
+        np.array(all_poly_y),
+        np.array(poly_lengths)
+    )
+
+
+# Update main function to use vectorized operations:
+def find_dominant_path_vectorized(
+    tx_lat: float, tx_lon: float,
+    rx_lat: float, rx_lon: float,
+    buildings: list,
+    frequency_mhz: float = 1800
+) -> dict:
+    """
+    Find dominant propagation path using vectorized NumPy operations.
+    
+    Returns dict with:
+        - has_los: bool
+        - path_type: 'direct' | 'reflection' | 'diffraction'
+        - total_loss: float (dB)
+        - reflection_point: [lat, lon] or None
+    """
+    # Reference point for local coords (midpoint)
+    ref_lat = (tx_lat + rx_lat) / 2
+    ref_lon = (tx_lon + rx_lon) / 2
+    
+    # Convert TX/RX to local coords
+    tx_x, tx_y = points_to_local_coords(ref_lat, ref_lon, 
+                                         np.array([tx_lat]), np.array([tx_lon]))
+    rx_x, rx_y = points_to_local_coords(ref_lat, ref_lon,
+                                         np.array([rx_lat]), np.array([rx_lon]))
+    tx = np.array([tx_x[0], tx_y[0]])
+    rx = np.array([rx_x[0], rx_y[0]])
+    
+    # Convert buildings to arrays
+    (walls_start, walls_end, wall_to_bldg,
+     poly_x, poly_y, poly_lengths) = _buildings_to_arrays(buildings, ref_lat, ref_lon)
+    
+    direct_dist = np.linalg.norm(rx - tx)
+    
+    # Step 1: Check direct LOS
+    if len(poly_lengths) == 0:
+        # No buildings, direct LOS
+        return {
+            'has_los': True,
+            'path_type': 'direct',
+            'total_loss': 0.0,
+            'reflection_point': None,
+            'path_length': direct_dist
+        }
+    
+    intersects, _ = line_intersects_polygons_batch(tx, rx, poly_x, poly_y, poly_lengths)
+    
+    if not np.any(intersects):
+        # Direct LOS exists
+        return {
+            'has_los': True,
+            'path_type': 'direct',
+            'total_loss': 0.0,
+            'reflection_point': None,
+            'path_length': direct_dist
+        }
+    
+    # Step 2: Find best reflection path
+    refl_point, refl_length, refl_loss = find_best_reflection_path_vectorized(
+        tx, rx, walls_start, walls_end, wall_to_bldg,
+        poly_x, poly_y, poly_lengths,
+        max_candidates=50
+    )
+    
+    if refl_point is not None:
+        # Convert reflection point back to lat/lon
+        refl_lat = ref_lat + refl_point[1] / 110540
+        refl_lon = ref_lon + refl_point[0] / (111320 * np.cos(np.radians(ref_lat)))
+        
+        return {
+            'has_los': False,
+            'path_type': 'reflection',
+            'total_loss': refl_loss,
+            'reflection_point': [refl_lat, refl_lon],
+            'path_length': refl_length
+        }
+    
+    # Step 3: Fallback to diffraction (simplified)
+    # Count blocking buildings for diffraction loss estimate
+    num_blocking = np.sum(intersects)
+    diffraction_loss = 10 + 5 * min(num_blocking, 5)  # 10-35 dB
+    
+    return {
+        'has_los': False,
+        'path_type': 'diffraction',
+        'total_loss': diffraction_loss,
+        'reflection_point': None,
+        'path_length': direct_dist  # Approximate
+    }
+```
+
+---
+
+### MODIFY: `backend/app/services/coverage_service.py`
+
+Update `_calculate_point_sync()` to use vectorized dominant path:
+
+```python
+# In _calculate_point_sync(), replace dominant_path call:
+
+if use_dominant_path and buildings:
+    from .dominant_path_service import find_dominant_path_vectorized
+    
+    dominant = find_dominant_path_vectorized(
+        tx_lat=site['lat'],
+        tx_lon=site['lon'],
+        rx_lat=point_lat,
+        rx_lon=point_lon,
+        buildings=buildings,
+        frequency_mhz=site.get('frequency', 1800)
+    )
+    
+    if dominant['path_type'] == 'reflection':
+        reflection_gain = max(0, 10 - dominant['total_loss'])  # Convert loss to gain
+        building_loss = 0  # Reflection path avoids buildings
+    elif dominant['path_type'] == 'diffraction':
+        building_loss = dominant['total_loss']
+        reflection_gain = 0
+    else:
+        # Direct LOS
+        building_loss = 0
+        reflection_gain = 0
+```
+
+---
+
+## 🧪 Testing
+
+```bash
+# Run test script
+.\test-coverage.bat
+
+# Expected results:
+# Fast: ~0.03s (unchanged)
+# Standard: ~35-40s (unchanged)  
+# Detailed: ~30-60s (was 300s timeout!) ← 5-10x faster
+```
+
+---
+
+## ✅ Success Criteria
+
+| Metric | Before | After |
+|--------|--------|-------|
+| Detailed time | 300s (timeout) | <90s |
+| Detailed ms/point | 346ms | <50ms |
+| Memory peak | ~7GB | ~3-4GB |
+| Accuracy | Baseline | Similar ±2dB |
+
+---
+
+## 📝 Notes
+
+1. **LOS checks still have a small loop** — but limited to top 50 candidates sorted by path length
+2. **Building→array conversion** happens once per calculation, not per point
+3. **Local coordinate system** avoids expensive lat/lon math in inner loops
+4. **Reflection model simplified** — uses path ratio for loss estimate
+
+---
+
+## 🔜 Future Optimizations
+
+If still slow after vectorization:
+- Cache building arrays (don't reconvert every point)
+- Use scipy.spatial.cKDTree for spatial queries
+- GPU acceleration with CuPy (drop-in NumPy replacement)
--- a/backend/app/services/dominant_path_service.py
+++ b/backend/app/services/dominant_path_service.py
@@ -21,9 +21,9 @@ class RayPath:
    is_valid: bool  # Does this path exist?


-MAX_BUILDINGS_FOR_LINE = 100
-MAX_BUILDINGS_FOR_REFLECTION = 100
-MAX_DISTANCE_FROM_PATH = 500  # meters
+MAX_BUILDINGS_FOR_LINE = 50
+MAX_BUILDINGS_FOR_REFLECTION = 30
+MAX_DISTANCE_FROM_PATH = 300  # meters


 def _filter_buildings_by_distance(buildings, tx_point, rx_point, max_count=100, max_distance=500):
@@ -485,8 +485,8 @@ class DominantPathService:
        if spatial_idx:
            mid_lat = (tx_lat + rx_lat) / 2
            mid_lon = (tx_lon + rx_lon) / 2
-            # buffer_cells=5 with 0.001° cell ≈ 555m radius
-            reflection_buildings = spatial_idx.query_point(mid_lat, mid_lon, buffer_cells=5)
+            # buffer_cells=3 with 0.001° cell ≈ 333m radius
+            reflection_buildings = spatial_idx.query_point(mid_lat, mid_lon, buffer_cells=3)
        else:
            reflection_buildings = buildings

--- a/backend/app/services/gpu_service.py
+++ b/backend/app/services/gpu_service.py
@@ -21,7 +21,8 @@ cp = None

 try:
    import cupy as _cp
-    if _cp.cuda.runtime.getDeviceCount() > 0:
+    device_count = _cp.cuda.runtime.getDeviceCount()
+    if device_count > 0:
        cp = _cp
        GPU_AVAILABLE = True
        props = _cp.cuda.runtime.getDeviceProperties(0)
@@ -31,10 +32,16 @@ try:
            "cuda_version": _cp.cuda.runtime.runtimeGetVersion(),
        }
        print(f"[GPU] CUDA available: {GPU_INFO['name']} ({GPU_INFO['memory_mb']} MB)", flush=True)
+    else:
+        print("[GPU] No CUDA devices found", flush=True)
 except ImportError:
    print("[GPU] CuPy not installed — using CPU/NumPy", flush=True)
+    print("[GPU]   To enable GPU acceleration, install CuPy:", flush=True)
+    print("[GPU]   For CUDA 12.x:  pip install cupy-cuda12x", flush=True)
+    print("[GPU]   For CUDA 11.x:  pip install cupy-cuda11x", flush=True)
+    print("[GPU]   Check CUDA version: nvidia-smi", flush=True)
 except Exception as e:
-    print(f"[GPU] CUDA check failed: {e} — using CPU/NumPy", flush=True)
+    print(f"[GPU] CuPy error: {e} — GPU acceleration disabled", flush=True)


 # Array module: cupy on GPU, numpy on CPU
--- a/backend/app/services/parallel_coverage_service.py
+++ b/backend/app/services/parallel_coverage_service.py
@@ -23,12 +23,12 @@ Usage:

 import os
 import sys
+import subprocess
 import time
 import threading
 import multiprocessing as mp
 from typing import List, Dict, Tuple, Any, Optional, Callable
 import numpy as np
-import psutil


 # ── Cancellation token ──
@@ -49,42 +49,77 @@ class CancellationToken:

 # ── Worker process cleanup ──

+def _clog(msg: str):
+    """Log with [PARALLEL] prefix."""
+    print(f"[PARALLEL] {msg}", flush=True)
+
+
 def _kill_worker_processes() -> int:
-    """Kill all child processes of the current process.
+    """Kill ALL rfcp-server processes except the current (main) process.

-    Uses psutil to find and terminate/kill child processes that may be
-    orphaned after ProcessPoolExecutor timeout or cancellation.
-    Returns the number of children killed.
+    Uses process NAME matching instead of PID tree because psutil.children()
+    cannot see grandchildren spawned by ProcessPoolExecutor workers.
+    Returns the number of processes killed.
    """
+    my_pid = os.getpid()
+    killed_count = 0
+
+    if sys.platform == 'win32':
        try:
-        current = psutil.Process(os.getpid())
-        children = current.children(recursive=True)
-    except (psutil.NoSuchProcess, psutil.AccessDenied):
-        return 0
-
-    if not children:
-        return 0
-
-    count = len(children)
-
-    # First: graceful terminate
-    for child in children:
+            # List all rfcp-server.exe processes in CSV format
+            result = subprocess.run(
+                ['tasklist', '/FI', 'IMAGENAME eq rfcp-server.exe', '/FO', 'CSV', '/NH'],
+                capture_output=True, text=True, timeout=5,
+            )
+            for line in result.stdout.strip().split('\n'):
+                if 'rfcp-server.exe' not in line:
+                    continue
+                parts = line.split(',')
+                if len(parts) >= 2:
+                    pid_str = parts[1].strip().strip('"')
                    try:
-            child.terminate()
-        except (psutil.NoSuchProcess, psutil.AccessDenied):
+                        pid = int(pid_str)
+                        if pid != my_pid:
+                            subprocess.run(
+                                ['taskkill', '/F', '/PID', str(pid)],
+                                capture_output=True, timeout=5,
+                            )
+                            killed_count += 1
+                            _clog(f"Killed worker PID {pid}")
+                    except (ValueError, subprocess.TimeoutExpired):
                        pass
-
-    # Wait up to 3 seconds for graceful exit
-    gone, alive = psutil.wait_procs(children, timeout=3)
-
-    # Force kill survivors
-    for p in alive:
+        except Exception as e:
+            _clog(f"Kill workers error: {e}")
+            # Fallback: kill ALL rfcp-server.exe
            try:
-            p.kill()
-        except (psutil.NoSuchProcess, psutil.AccessDenied):
+                subprocess.run(
+                    ['taskkill', '/F', '/IM', 'rfcp-server.exe', '/T'],
+                    capture_output=True, timeout=5,
+                )
+            except Exception:
                pass
+    else:
+        # Unix: pgrep + kill
+        try:
+            result = subprocess.run(
+                ['pgrep', '-f', 'rfcp-server'],
+                capture_output=True, text=True, timeout=5,
+            )
+            for pid_str in result.stdout.strip().split('\n'):
+                if not pid_str:
+                    continue
+                try:
+                    pid = int(pid_str)
+                    if pid != my_pid:
+                        os.kill(pid, 9)  # SIGKILL
+                        killed_count += 1
+                        _clog(f"Killed worker PID {pid}")
+                except (ValueError, ProcessLookupError, PermissionError):
+                    pass
+        except Exception as e:
+            _clog(f"Kill workers error: {e}")

-    return count
+    return killed_count


 # ── Try to import Ray ──
@@ -470,7 +505,9 @@ def _calculate_with_process_pool(
    pool = None

    try:
-        pool = ProcessPoolExecutor(max_workers=num_workers)
+        # Use spawn context for clean worker processes
+        ctx = mp.get_context('spawn')
+        pool = ProcessPoolExecutor(max_workers=num_workers, mp_context=ctx)
        futures = {
            pool.submit(
                _pool_worker_process_chunk,
@@ -510,9 +547,12 @@ def _calculate_with_process_pool(
        # CRITICAL: Always cleanup pool and orphaned workers
        if pool:
            pool.shutdown(wait=False, cancel_futures=True)
+        # Give pool time to cleanup gracefully
+        time.sleep(0.5)
+        # Then force kill any survivors by process name
        killed = _kill_worker_processes()
        if killed > 0:
-            log_fn(f"Killed {killed} orphaned worker processes")
+            log_fn(f"Force killed {killed} orphaned workers")

    calc_time = time.time() - t_calc
    log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -11,7 +11,6 @@ requests==2.31.0
 httpx==0.27.0
 aiosqlite>=0.19.0
 sqlalchemy>=2.0.0
-psutil>=5.9.0
 ray[default]>=2.9.0
 # GPU acceleration (optional — install cupy-cuda12x for NVIDIA GPU support)
 # cupy-cuda12x>=13.0.0
--- a/frontend/src/components/map/ElevationLayer.tsx
+++ b/frontend/src/components/map/ElevationLayer.tsx
@@ -8,39 +8,36 @@ interface ElevationLayerProps {
  opacity: number;
 }

-// Terrain color gradient: low = green, mid = yellow/tan, high = brown/white
-const COLOR_STOPS = [
-  { elev: 0, r: 20, g: 100, b: 40 },      // dark green
-  { elev: 100, r: 50, g: 160, b: 60 },     // green
-  { elev: 200, r: 130, g: 200, b: 80 },    // yellow-green
-  { elev: 350, r: 210, g: 190, b: 100 },   // tan
-  { elev: 500, r: 180, g: 140, b: 80 },    // brown
-  { elev: 800, r: 160, g: 120, b: 90 },    // dark brown
-  { elev: 1200, r: 200, g: 190, b: 180 },  // light grey
-  { elev: 2000, r: 240, g: 240, b: 240 },  // near white
+// Color gradient for normalized elevation (0 = lowest local, 1 = highest local)
+// Blue (valleys) → Green → Yellow → Orange → Brown (peaks)
+const GRADIENT_STOPS: [number, number, number][] = [
+  [33, 102, 172],   // 0.0 — deep blue (lowest)
+  [103, 169, 207],  // 0.2 — light blue
+  [145, 207, 96],   // 0.4 — green
+  [254, 224, 139],  // 0.6 — yellow
+  [252, 141, 89],   // 0.8 — orange
+  [215, 48, 39],    // 1.0 — brown/red (highest)
 ];

-function getColorForElevation(elev: number): [number, number, number] {
-  if (elev <= COLOR_STOPS[0].elev) {
-    return [COLOR_STOPS[0].r, COLOR_STOPS[0].g, COLOR_STOPS[0].b];
+function getColorForNormalizedElevation(normalized: number): [number, number, number] {
+  const n = Math.max(0, Math.min(1, normalized));
+  // Map 0-1 to gradient index (0-5)
+  const scaled = n * (GRADIENT_STOPS.length - 1);
+  const idx = Math.floor(scaled);
+  const t = scaled - idx;
+
+  if (idx >= GRADIENT_STOPS.length - 1) {
+    return GRADIENT_STOPS[GRADIENT_STOPS.length - 1];
  }

-  for (let i = 1; i < COLOR_STOPS.length; i++) {
-    if (elev <= COLOR_STOPS[i].elev) {
-      const low = COLOR_STOPS[i - 1];
-      const high = COLOR_STOPS[i];
-      const t = (elev - low.elev) / (high.elev - low.elev);
+  const low = GRADIENT_STOPS[idx];
+  const high = GRADIENT_STOPS[idx + 1];
  return [
-        Math.round(low.r + t * (high.r - low.r)),
-        Math.round(low.g + t * (high.g - low.g)),
-        Math.round(low.b + t * (high.b - low.b)),
+    Math.round(low[0] + t * (high[0] - low[0])),
+    Math.round(low[1] + t * (high[1] - low[1])),
+    Math.round(low[2] + t * (high[2] - low[2])),
  ];
 }
-  }
-
-  const last = COLOR_STOPS[COLOR_STOPS.length - 1];
-  return [last.r, last.g, last.b];
-}

 export default function ElevationLayer({ visible, opacity }: ElevationLayerProps) {
  const map = useMap();
@@ -100,10 +97,16 @@ export default function ElevationLayer({ visible, opacity }: ElevationLayerProps
      const imageData = ctx.createImageData(data.cols, data.rows);
      const pixels = imageData.data;

+      // Use LOCAL min/max for color normalization (not absolute thresholds)
+      const minElev = data.min_elevation;
+      const maxElev = data.max_elevation;
+      const elevRange = maxElev - minElev || 1; // avoid division by zero
+
      for (let row = 0; row < data.rows; row++) {
        for (let col = 0; col < data.cols; col++) {
          const elev = data.grid[row][col];
-          const [r, g, b] = getColorForElevation(elev);
+          const normalized = (elev - minElev) / elevRange;
+          const [r, g, b] = getColorForNormalizedElevation(normalized);
          const idx = (row * data.cols + col) * 4;
          pixels[idx] = r;
          pixels[idx + 1] = g;
--- a/installer/coverage-result-fast.json
+++ b/installer/coverage-result-fast.json
--- a/installer/coverage-result-standard.json
+++ b/installer/coverage-result-standard.json