diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index c3582a9..58e66b4 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -30,7 +30,20 @@
"Bash(pip3 install numpy)",
"Bash(echo:*)",
"Bash(find:*)",
- "Bash(node -c:*)"
+ "Bash(node -c:*)",
+ "Bash(curl:*)",
+ "Bash(head -3 python3 -c \"import numpy; print\\(numpy.__file__\\)\")",
+ "Bash(pip3 install:*)",
+ "Bash(apt list:*)",
+ "Bash(dpkg:*)",
+ "Bash(sudo apt-get install:*)",
+ "Bash(docker:*)",
+ "Bash(~/.local/bin/pip install:*)",
+ "Bash(pgrep:*)",
+ "Bash(kill:*)",
+ "Bash(sort:*)",
+ "Bash(journalctl:*)",
+ "Bash(pkill:*)"
]
}
}
diff --git a/RFCP-Iteration-3.5.0-GPU-Acceleration.md b/RFCP-Iteration-3.5.0-GPU-Acceleration.md
new file mode 100644
index 0000000..7d04754
--- /dev/null
+++ b/RFCP-Iteration-3.5.0-GPU-Acceleration.md
@@ -0,0 +1,1096 @@
+# RFCP Iteration 3.5.0 — GPU Acceleration & UI Polish
+
+## Overview
+
+Major performance upgrade with GPU acceleration support and UI/UX improvements.
+
+**Key Goals:**
+- GPU acceleration for 10-50x speedup
+- Fix timeout for large radius calculations
+- Terrain Profile Viewer
+- UI polish and fixes
+
+---
+
+## Phase 1: Critical Fixes
+
+### 1.1 Timeout Fix for Tiled Calculations
+
+**Problem:** 5 minute timeout kills calculations > 20km even though they're working correctly.
+
+**Solution:**
+
+```python
+# backend/app/api/websocket.py
+
+# Per-tile timeout instead of total timeout
+TILE_TIMEOUT_SECONDS = 120 # 2 min per tile (generous)
+TOTAL_TIMEOUT_SECONDS = 1800 # 30 min max for entire calculation
+
+# For tiled calculations:
+if is_tiled_calculation(radius):
+ timeout = TOTAL_TIMEOUT_SECONDS
+else:
+ timeout = 300 # 5 min for small calculations
+```
+
+**Files:**
+- `backend/app/api/websocket.py`
+- `backend/app/services/coverage_service.py`
+
+---
+
+### 1.2 Coverage Boundary Fix
+
+**Problem:** White dashed boundary doesn't work correctly for multi-site/tiled calculations.
+
+**Solution:**
+1. Fix boundary to use convex hull of ALL points > threshold
+2. Add toggle button in legend: "Show Boundary"
+3. Default: OFF (less visual clutter)
+
+```typescript
+// frontend/src/components/map/HeatmapLegend.tsx
+
+// Add toggle
+const [showBoundary, setShowBoundary] = useState(false);
+
+
+```
+
+```typescript
+// frontend/src/components/map/CoverageBoundary.tsx
+
+// Calculate convex hull of all points above threshold
+function calculateBoundary(points: CoveragePoint[], threshold: number) {
+ const validPoints = points.filter(p => p.rsrp >= threshold);
+ if (validPoints.length < 3) return null;
+
+ // Use convex hull algorithm (Graham scan or gift wrapping)
+ return convexHull(validPoints.map(p => [p.lat, p.lon]));
+}
+```
+
+**Files:**
+- `frontend/src/components/map/CoverageBoundary.tsx`
+- `frontend/src/components/map/HeatmapLegend.tsx`
+- `frontend/src/store/settings.ts` (add showBoundary state)
+
+---
+
+## Phase 2: GPU Acceleration
+
+### 2.1 GPU Backend Detection
+
+**Support hierarchy:**
+1. NVIDIA CUDA (fastest) — CuPy
+2. OpenCL (any GPU) — PyOpenCL
+3. CPU fallback — NumPy
+
+```python
+# backend/app/services/gpu_backend.py (NEW)
+
+from typing import Tuple, Optional
+from enum import Enum
+
+class ComputeBackend(Enum):
+ CUDA = "cuda"
+ OPENCL = "opencl"
+ CPU = "cpu"
+
+class GPUManager:
+ _instance = None
+ _backend: ComputeBackend = ComputeBackend.CPU
+ _device_name: str = "CPU (NumPy)"
+ _devices: list = []
+
+ @classmethod
+ def detect_backends(cls) -> list:
+ """Detect all available compute backends."""
+ backends = []
+
+ # Check NVIDIA CUDA
+ try:
+ import cupy as cp
+ for i in range(cp.cuda.runtime.getDeviceCount()):
+ device = cp.cuda.Device(i)
+ backends.append({
+ "type": ComputeBackend.CUDA,
+ "id": i,
+ "name": device.name,
+ "memory": device.mem_info[1], # total memory
+ "priority": 1 # highest
+ })
+ except Exception:
+ pass
+
+ # Check OpenCL (AMD, Intel, NVIDIA)
+ try:
+ import pyopencl as cl
+ for platform in cl.get_platforms():
+ for device in platform.get_devices():
+ # Skip if already have CUDA version of same GPU
+ if ComputeBackend.CUDA in [b["type"] for b in backends]:
+ if "NVIDIA" in device.name:
+ continue
+
+ backends.append({
+ "type": ComputeBackend.OPENCL,
+ "id": f"{platform.name}:{device.name}",
+ "name": device.name,
+ "memory": device.global_mem_size,
+ "priority": 2
+ })
+ except Exception:
+ pass
+
+ # CPU always available
+ import multiprocessing
+ backends.append({
+ "type": ComputeBackend.CPU,
+ "id": "cpu",
+ "name": f"CPU ({multiprocessing.cpu_count()} cores)",
+ "memory": None,
+ "priority": 3
+ })
+
+ cls._devices = sorted(backends, key=lambda x: x["priority"])
+ return cls._devices
+
+ @classmethod
+ def get_devices(cls) -> list:
+ """Get list of available compute devices."""
+ if not cls._devices:
+ cls.detect_backends()
+ return cls._devices
+
+ @classmethod
+ def set_backend(cls, backend_type: ComputeBackend, device_id: Optional[str] = None):
+ """Set active compute backend."""
+ cls._backend = backend_type
+
+ if backend_type == ComputeBackend.CUDA:
+ import cupy as cp
+ device_idx = int(device_id) if device_id else 0
+ cp.cuda.Device(device_idx).use()
+ cls._device_name = cp.cuda.Device(device_idx).name
+
+ elif backend_type == ComputeBackend.OPENCL:
+ # Store for later use in calculations
+ cls._opencl_device_id = device_id
+ cls._device_name = device_id.split(":")[-1] if device_id else "OpenCL"
+
+ else:
+ cls._device_name = "CPU (NumPy)"
+
+ @classmethod
+ def get_array_module(cls):
+ """Get numpy-compatible array module for current backend."""
+ if cls._backend == ComputeBackend.CUDA:
+ import cupy as cp
+ return cp
+ else:
+ import numpy as np
+ return np
+
+ @classmethod
+ def get_status(cls) -> dict:
+ """Get current GPU status for UI."""
+ return {
+ "backend": cls._backend.value,
+ "device_name": cls._device_name,
+ "available_devices": cls.get_devices()
+ }
+```
+
+### 2.2 GPU-Accelerated Path Loss Calculation
+
+```python
+# backend/app/services/propagation_gpu.py (NEW)
+
+from .gpu_backend import GPUManager, ComputeBackend
+
+def calculate_path_loss_batch_gpu(
+ site_lat: float,
+ site_lon: float,
+ site_height: float,
+ points_lat: np.ndarray, # Can be large array
+ points_lon: np.ndarray,
+ frequency_mhz: float,
+ environment: str = "suburban"
+) -> np.ndarray:
+ """
+ Calculate path loss for ALL points at once using GPU.
+
+ Returns array of path loss values in dB.
+ """
+ xp = GPUManager.get_array_module() # numpy or cupy
+
+ # Transfer to GPU if using CUDA
+ if GPUManager._backend == ComputeBackend.CUDA:
+ lats = xp.asarray(points_lat)
+ lons = xp.asarray(points_lon)
+ else:
+ lats = points_lat
+ lons = points_lon
+
+ # Vectorized distance calculation (Haversine)
+ R = 6371000 # Earth radius in meters
+ lat1 = xp.radians(site_lat)
+ lat2 = xp.radians(lats)
+ dlat = lat2 - lat1
+ dlon = xp.radians(lons - site_lon)
+
+ a = xp.sin(dlat/2)**2 + xp.cos(lat1) * xp.cos(lat2) * xp.sin(dlon/2)**2
+ c = 2 * xp.arctan2(xp.sqrt(a), xp.sqrt(1-a))
+ distances_m = R * c
+ distances_km = distances_m / 1000.0
+
+ # Avoid log(0)
+ distances_km = xp.maximum(distances_km, 0.01)
+
+ # Okumura-Hata model (vectorized)
+ f = frequency_mhz
+ hb = site_height
+
+ # Base formula
+ A = 69.55 + 26.16 * xp.log10(f) - 13.82 * xp.log10(hb)
+ B = 44.9 - 6.55 * xp.log10(hb)
+
+ path_loss = A + B * xp.log10(distances_km)
+
+ # Environment corrections
+ if environment == "urban":
+ pass # Base formula
+ elif environment == "suburban":
+ path_loss = path_loss - 2 * (xp.log10(f/28))**2 - 5.4
+ elif environment == "rural":
+ path_loss = path_loss - 4.78 * (xp.log10(f))**2 + 18.33 * xp.log10(f) - 40.94
+
+ # Transfer back to CPU if needed
+ if GPUManager._backend == ComputeBackend.CUDA:
+ return path_loss.get() # cupy → numpy
+
+ return path_loss
+
+
+def calculate_rsrp_batch_gpu(
+ tx_power_dbm: float,
+ antenna_gain_dbi: float,
+ cable_loss_db: float,
+ path_loss_db: np.ndarray,
+ additional_loss_db: np.ndarray = None
+) -> np.ndarray:
+ """
+ Calculate RSRP for all points at once.
+
+ RSRP = TX Power + Antenna Gain - Cable Loss - Path Loss - Additional Loss
+ """
+ xp = GPUManager.get_array_module()
+
+ if GPUManager._backend == ComputeBackend.CUDA:
+ path_loss = xp.asarray(path_loss_db)
+ add_loss = xp.asarray(additional_loss_db) if additional_loss_db is not None else 0
+ else:
+ path_loss = path_loss_db
+ add_loss = additional_loss_db if additional_loss_db is not None else 0
+
+ eirp = tx_power_dbm + antenna_gain_dbi - cable_loss_db
+ rsrp = eirp - path_loss - add_loss
+
+ if GPUManager._backend == ComputeBackend.CUDA:
+ return rsrp.get()
+
+ return rsrp
+```
+
+### 2.3 GPU Terrain Interpolation
+
+```python
+# backend/app/services/terrain_gpu.py (NEW)
+
+def interpolate_terrain_batch_gpu(
+ terrain_data: np.ndarray,
+ terrain_bounds: tuple, # (min_lat, min_lon, max_lat, max_lon)
+ points_lat: np.ndarray,
+ points_lon: np.ndarray
+) -> np.ndarray:
+ """
+ Bilinear interpolation of terrain heights for all points.
+
+ GPU version uses texture memory for fast 2D lookups.
+ """
+ xp = GPUManager.get_array_module()
+
+ min_lat, min_lon, max_lat, max_lon = terrain_bounds
+ rows, cols = terrain_data.shape
+
+ if GPUManager._backend == ComputeBackend.CUDA:
+ # Upload terrain as texture (cached on GPU)
+ terrain_gpu = xp.asarray(terrain_data)
+ lats = xp.asarray(points_lat)
+ lons = xp.asarray(points_lon)
+ else:
+ terrain_gpu = terrain_data
+ lats = points_lat
+ lons = points_lon
+
+ # Normalize coordinates to [0, 1]
+ lat_norm = (lats - min_lat) / (max_lat - min_lat)
+ lon_norm = (lons - min_lon) / (max_lon - min_lon)
+
+ # Convert to pixel coordinates
+ y = lat_norm * (rows - 1)
+ x = lon_norm * (cols - 1)
+
+ # Bilinear interpolation indices
+ x0 = xp.floor(x).astype(int)
+ x1 = xp.minimum(x0 + 1, cols - 1)
+ y0 = xp.floor(y).astype(int)
+ y1 = xp.minimum(y0 + 1, rows - 1)
+
+ # Clamp to valid range
+ x0 = xp.clip(x0, 0, cols - 1)
+ y0 = xp.clip(y0, 0, rows - 1)
+
+ # Interpolation weights
+ wx = x - x0
+ wy = y - y0
+
+ # Bilinear interpolation
+ heights = (
+ terrain_gpu[y0, x0] * (1 - wx) * (1 - wy) +
+ terrain_gpu[y0, x1] * wx * (1 - wy) +
+ terrain_gpu[y1, x0] * (1 - wx) * wy +
+ terrain_gpu[y1, x1] * wx * wy
+ )
+
+ if GPUManager._backend == ComputeBackend.CUDA:
+ return heights.get()
+
+ return heights
+```
+
+### 2.4 API Endpoint for GPU Status
+
+```python
+# backend/app/api/gpu.py (NEW)
+
+from fastapi import APIRouter
+from ..services.gpu_backend import GPUManager, ComputeBackend
+
+router = APIRouter(prefix="/api/gpu", tags=["GPU"])
+
+@router.get("/status")
+async def get_gpu_status():
+ """Get current GPU acceleration status."""
+ return GPUManager.get_status()
+
+@router.get("/devices")
+async def get_available_devices():
+ """List all available compute devices."""
+ return {"devices": GPUManager.get_devices()}
+
+@router.post("/set")
+async def set_compute_backend(backend: str, device_id: str = None):
+ """Set active compute backend."""
+ backend_enum = ComputeBackend(backend)
+ GPUManager.set_backend(backend_enum, device_id)
+ return {"status": "ok", "backend": backend, "device": GPUManager._device_name}
+```
+
+### 2.5 Frontend GPU Settings UI
+
+```typescript
+// frontend/src/components/panels/GPUSettings.tsx (NEW)
+
+import { useState, useEffect } from 'react';
+import { Gpu, Cpu, Zap } from 'lucide-react';
+
+interface Device {
+ type: 'cuda' | 'opencl' | 'cpu';
+ id: string;
+ name: string;
+ memory: number | null;
+}
+
+export function GPUSettings() {
+ const [devices, setDevices] = useState([]);
+ const [activeBackend, setActiveBackend] = useState('cpu');
+ const [activeDevice, setActiveDevice] = useState('');
+ const [loading, setLoading] = useState(true);
+
+ useEffect(() => {
+ fetchGPUStatus();
+ }, []);
+
+ const fetchGPUStatus = async () => {
+ try {
+ const res = await fetch('/api/gpu/devices');
+ const data = await res.json();
+ setDevices(data.devices);
+
+ const status = await fetch('/api/gpu/status');
+ const statusData = await status.json();
+ setActiveBackend(statusData.backend);
+ setActiveDevice(statusData.device_name);
+ } finally {
+ setLoading(false);
+ }
+ };
+
+ const setBackend = async (type: string, deviceId?: string) => {
+ await fetch('/api/gpu/set', {
+ method: 'POST',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ backend: type, device_id: deviceId })
+ });
+ fetchGPUStatus();
+ };
+
+ const getIcon = (type: string) => {
+ if (type === 'cuda') return ;
+ if (type === 'opencl') return ;
+ return ;
+ };
+
+ const formatMemory = (bytes: number | null) => {
+ if (!bytes) return '';
+ const gb = bytes / 1024 / 1024 / 1024;
+ return `${gb.toFixed(1)} GB`;
+ };
+
+ return (
+
+
+
+ Compute Acceleration
+
+
+ {/* Current status */}
+
+ Active:
+ {activeDevice}
+
+
+ {/* Device list */}
+
+ {devices.map((device) => (
+
+ ))}
+
+
+ {/* Info */}
+
+ GPU acceleration provides 10-50x speedup for large calculations.
+ NVIDIA CUDA is fastest, OpenCL works with AMD/Intel GPUs.
+
+
+ );
+}
+```
+
+### 2.6 Status Bar GPU Indicator
+
+```typescript
+// frontend/src/components/StatusBar.tsx (add to existing or create)
+
+// Add GPU indicator to status bar / toolbar
+