# RFCP Iteration 3.5.0 — GPU Acceleration & UI Polish
## Overview
Major performance upgrade with GPU acceleration support and UI/UX improvements.
**Key Goals:**
- GPU acceleration for 10-50x speedup
- Fix timeout for large radius calculations
- Terrain Profile Viewer
- UI polish and fixes
---
## Phase 1: Critical Fixes
### 1.1 Timeout Fix for Tiled Calculations
**Problem:** 5 minute timeout kills calculations > 20km even though they're working correctly.
**Solution:**
```python
# backend/app/api/websocket.py
# Per-tile timeout instead of total timeout
TILE_TIMEOUT_SECONDS = 120 # 2 min per tile (generous)
TOTAL_TIMEOUT_SECONDS = 1800 # 30 min max for entire calculation
# For tiled calculations:
if is_tiled_calculation(radius):
timeout = TOTAL_TIMEOUT_SECONDS
else:
timeout = 300 # 5 min for small calculations
```
**Files:**
- `backend/app/api/websocket.py`
- `backend/app/services/coverage_service.py`
---
### 1.2 Coverage Boundary Fix
**Problem:** White dashed boundary doesn't work correctly for multi-site/tiled calculations.
**Solution:**
1. Fix boundary to use convex hull of ALL points > threshold
2. Add toggle button in legend: "Show Boundary"
3. Default: OFF (less visual clutter)
```typescript
// frontend/src/components/map/HeatmapLegend.tsx
// Add toggle
const [showBoundary, setShowBoundary] = useState(false);
```
```typescript
// frontend/src/components/map/CoverageBoundary.tsx
// Calculate convex hull of all points above threshold
function calculateBoundary(points: CoveragePoint[], threshold: number) {
const validPoints = points.filter(p => p.rsrp >= threshold);
if (validPoints.length < 3) return null;
// Use convex hull algorithm (Graham scan or gift wrapping)
return convexHull(validPoints.map(p => [p.lat, p.lon]));
}
```
**Files:**
- `frontend/src/components/map/CoverageBoundary.tsx`
- `frontend/src/components/map/HeatmapLegend.tsx`
- `frontend/src/store/settings.ts` (add showBoundary state)
---
## Phase 2: GPU Acceleration
### 2.1 GPU Backend Detection
**Support hierarchy:**
1. NVIDIA CUDA (fastest) — CuPy
2. OpenCL (any GPU) — PyOpenCL
3. CPU fallback — NumPy
```python
# backend/app/services/gpu_backend.py (NEW)
from typing import Tuple, Optional
from enum import Enum
class ComputeBackend(Enum):
CUDA = "cuda"
OPENCL = "opencl"
CPU = "cpu"
class GPUManager:
_instance = None
_backend: ComputeBackend = ComputeBackend.CPU
_device_name: str = "CPU (NumPy)"
_devices: list = []
@classmethod
def detect_backends(cls) -> list:
"""Detect all available compute backends."""
backends = []
# Check NVIDIA CUDA
try:
import cupy as cp
for i in range(cp.cuda.runtime.getDeviceCount()):
device = cp.cuda.Device(i)
backends.append({
"type": ComputeBackend.CUDA,
"id": i,
"name": device.name,
"memory": device.mem_info[1], # total memory
"priority": 1 # highest
})
except Exception:
pass
# Check OpenCL (AMD, Intel, NVIDIA)
try:
import pyopencl as cl
for platform in cl.get_platforms():
for device in platform.get_devices():
# Skip if already have CUDA version of same GPU
if ComputeBackend.CUDA in [b["type"] for b in backends]:
if "NVIDIA" in device.name:
continue
backends.append({
"type": ComputeBackend.OPENCL,
"id": f"{platform.name}:{device.name}",
"name": device.name,
"memory": device.global_mem_size,
"priority": 2
})
except Exception:
pass
# CPU always available
import multiprocessing
backends.append({
"type": ComputeBackend.CPU,
"id": "cpu",
"name": f"CPU ({multiprocessing.cpu_count()} cores)",
"memory": None,
"priority": 3
})
cls._devices = sorted(backends, key=lambda x: x["priority"])
return cls._devices
@classmethod
def get_devices(cls) -> list:
"""Get list of available compute devices."""
if not cls._devices:
cls.detect_backends()
return cls._devices
@classmethod
def set_backend(cls, backend_type: ComputeBackend, device_id: Optional[str] = None):
"""Set active compute backend."""
cls._backend = backend_type
if backend_type == ComputeBackend.CUDA:
import cupy as cp
device_idx = int(device_id) if device_id else 0
cp.cuda.Device(device_idx).use()
cls._device_name = cp.cuda.Device(device_idx).name
elif backend_type == ComputeBackend.OPENCL:
# Store for later use in calculations
cls._opencl_device_id = device_id
cls._device_name = device_id.split(":")[-1] if device_id else "OpenCL"
else:
cls._device_name = "CPU (NumPy)"
@classmethod
def get_array_module(cls):
"""Get numpy-compatible array module for current backend."""
if cls._backend == ComputeBackend.CUDA:
import cupy as cp
return cp
else:
import numpy as np
return np
@classmethod
def get_status(cls) -> dict:
"""Get current GPU status for UI."""
return {
"backend": cls._backend.value,
"device_name": cls._device_name,
"available_devices": cls.get_devices()
}
```
### 2.2 GPU-Accelerated Path Loss Calculation
```python
# backend/app/services/propagation_gpu.py (NEW)
from .gpu_backend import GPUManager, ComputeBackend
def calculate_path_loss_batch_gpu(
site_lat: float,
site_lon: float,
site_height: float,
points_lat: np.ndarray, # Can be large array
points_lon: np.ndarray,
frequency_mhz: float,
environment: str = "suburban"
) -> np.ndarray:
"""
Calculate path loss for ALL points at once using GPU.
Returns array of path loss values in dB.
"""
xp = GPUManager.get_array_module() # numpy or cupy
# Transfer to GPU if using CUDA
if GPUManager._backend == ComputeBackend.CUDA:
lats = xp.asarray(points_lat)
lons = xp.asarray(points_lon)
else:
lats = points_lat
lons = points_lon
# Vectorized distance calculation (Haversine)
R = 6371000 # Earth radius in meters
lat1 = xp.radians(site_lat)
lat2 = xp.radians(lats)
dlat = lat2 - lat1
dlon = xp.radians(lons - site_lon)
a = xp.sin(dlat/2)**2 + xp.cos(lat1) * xp.cos(lat2) * xp.sin(dlon/2)**2
c = 2 * xp.arctan2(xp.sqrt(a), xp.sqrt(1-a))
distances_m = R * c
distances_km = distances_m / 1000.0
# Avoid log(0)
distances_km = xp.maximum(distances_km, 0.01)
# Okumura-Hata model (vectorized)
f = frequency_mhz
hb = site_height
# Base formula
A = 69.55 + 26.16 * xp.log10(f) - 13.82 * xp.log10(hb)
B = 44.9 - 6.55 * xp.log10(hb)
path_loss = A + B * xp.log10(distances_km)
# Environment corrections
if environment == "urban":
pass # Base formula
elif environment == "suburban":
path_loss = path_loss - 2 * (xp.log10(f/28))**2 - 5.4
elif environment == "rural":
path_loss = path_loss - 4.78 * (xp.log10(f))**2 + 18.33 * xp.log10(f) - 40.94
# Transfer back to CPU if needed
if GPUManager._backend == ComputeBackend.CUDA:
return path_loss.get() # cupy → numpy
return path_loss
def calculate_rsrp_batch_gpu(
tx_power_dbm: float,
antenna_gain_dbi: float,
cable_loss_db: float,
path_loss_db: np.ndarray,
additional_loss_db: np.ndarray = None
) -> np.ndarray:
"""
Calculate RSRP for all points at once.
RSRP = TX Power + Antenna Gain - Cable Loss - Path Loss - Additional Loss
"""
xp = GPUManager.get_array_module()
if GPUManager._backend == ComputeBackend.CUDA:
path_loss = xp.asarray(path_loss_db)
add_loss = xp.asarray(additional_loss_db) if additional_loss_db is not None else 0
else:
path_loss = path_loss_db
add_loss = additional_loss_db if additional_loss_db is not None else 0
eirp = tx_power_dbm + antenna_gain_dbi - cable_loss_db
rsrp = eirp - path_loss - add_loss
if GPUManager._backend == ComputeBackend.CUDA:
return rsrp.get()
return rsrp
```
### 2.3 GPU Terrain Interpolation
```python
# backend/app/services/terrain_gpu.py (NEW)
def interpolate_terrain_batch_gpu(
terrain_data: np.ndarray,
terrain_bounds: tuple, # (min_lat, min_lon, max_lat, max_lon)
points_lat: np.ndarray,
points_lon: np.ndarray
) -> np.ndarray:
"""
Bilinear interpolation of terrain heights for all points.
GPU version uses texture memory for fast 2D lookups.
"""
xp = GPUManager.get_array_module()
min_lat, min_lon, max_lat, max_lon = terrain_bounds
rows, cols = terrain_data.shape
if GPUManager._backend == ComputeBackend.CUDA:
# Upload terrain as texture (cached on GPU)
terrain_gpu = xp.asarray(terrain_data)
lats = xp.asarray(points_lat)
lons = xp.asarray(points_lon)
else:
terrain_gpu = terrain_data
lats = points_lat
lons = points_lon
# Normalize coordinates to [0, 1]
lat_norm = (lats - min_lat) / (max_lat - min_lat)
lon_norm = (lons - min_lon) / (max_lon - min_lon)
# Convert to pixel coordinates
y = lat_norm * (rows - 1)
x = lon_norm * (cols - 1)
# Bilinear interpolation indices
x0 = xp.floor(x).astype(int)
x1 = xp.minimum(x0 + 1, cols - 1)
y0 = xp.floor(y).astype(int)
y1 = xp.minimum(y0 + 1, rows - 1)
# Clamp to valid range
x0 = xp.clip(x0, 0, cols - 1)
y0 = xp.clip(y0, 0, rows - 1)
# Interpolation weights
wx = x - x0
wy = y - y0
# Bilinear interpolation
heights = (
terrain_gpu[y0, x0] * (1 - wx) * (1 - wy) +
terrain_gpu[y0, x1] * wx * (1 - wy) +
terrain_gpu[y1, x0] * (1 - wx) * wy +
terrain_gpu[y1, x1] * wx * wy
)
if GPUManager._backend == ComputeBackend.CUDA:
return heights.get()
return heights
```
### 2.4 API Endpoint for GPU Status
```python
# backend/app/api/gpu.py (NEW)
from fastapi import APIRouter
from ..services.gpu_backend import GPUManager, ComputeBackend
router = APIRouter(prefix="/api/gpu", tags=["GPU"])
@router.get("/status")
async def get_gpu_status():
"""Get current GPU acceleration status."""
return GPUManager.get_status()
@router.get("/devices")
async def get_available_devices():
"""List all available compute devices."""
return {"devices": GPUManager.get_devices()}
@router.post("/set")
async def set_compute_backend(backend: str, device_id: str = None):
"""Set active compute backend."""
backend_enum = ComputeBackend(backend)
GPUManager.set_backend(backend_enum, device_id)
return {"status": "ok", "backend": backend, "device": GPUManager._device_name}
```
### 2.5 Frontend GPU Settings UI
```typescript
// frontend/src/components/panels/GPUSettings.tsx (NEW)
import { useState, useEffect } from 'react';
import { Gpu, Cpu, Zap } from 'lucide-react';
interface Device {
type: 'cuda' | 'opencl' | 'cpu';
id: string;
name: string;
memory: number | null;
}
export function GPUSettings() {
const [devices, setDevices] = useState([]);
const [activeBackend, setActiveBackend] = useState('cpu');
const [activeDevice, setActiveDevice] = useState('');
const [loading, setLoading] = useState(true);
useEffect(() => {
fetchGPUStatus();
}, []);
const fetchGPUStatus = async () => {
try {
const res = await fetch('/api/gpu/devices');
const data = await res.json();
setDevices(data.devices);
const status = await fetch('/api/gpu/status');
const statusData = await status.json();
setActiveBackend(statusData.backend);
setActiveDevice(statusData.device_name);
} finally {
setLoading(false);
}
};
const setBackend = async (type: string, deviceId?: string) => {
await fetch('/api/gpu/set', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ backend: type, device_id: deviceId })
});
fetchGPUStatus();
};
const getIcon = (type: string) => {
if (type === 'cuda') return ;
if (type === 'opencl') return ;
return ;
};
const formatMemory = (bytes: number | null) => {
if (!bytes) return '';
const gb = bytes / 1024 / 1024 / 1024;
return `${gb.toFixed(1)} GB`;
};
return (
Compute Acceleration
{/* Current status */}
Active: {activeDevice}
{/* Device list */}
{devices.map((device) => (
))}
{/* Info */}
GPU acceleration provides 10-50x speedup for large calculations.
NVIDIA CUDA is fastest, OpenCL works with AMD/Intel GPUs.
);
}
```
### 2.6 Status Bar GPU Indicator
```typescript
// frontend/src/components/StatusBar.tsx (add to existing or create)
// Add GPU indicator to status bar / toolbar