@mytec: iter2.3 multithreading p1 done
This commit is contained in:
543
RFCP-Phase-2.3-Performance-Optimization.md
Normal file
543
RFCP-Phase-2.3-Performance-Optimization.md
Normal file
@@ -0,0 +1,543 @@
|
||||
# RFCP Phase 2.3: Performance Optimization
|
||||
|
||||
**Date:** January 31, 2025
|
||||
**Type:** Performance & Parallelization
|
||||
**Estimated:** 8-12 hours
|
||||
**Priority:** HIGH — enables practical use of Detailed preset
|
||||
**Depends on:** Phase 2.2 (Offline Caching)
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Goal
|
||||
|
||||
Make Detailed preset usable by parallelizing calculations across CPU cores and optionally GPU. Target: **10-50x speedup**.
|
||||
|
||||
---
|
||||
|
||||
## 📊 Current Performance
|
||||
|
||||
| Preset | Points | Current Time | Target Time |
|
||||
|--------|--------|--------------|-------------|
|
||||
| Fast | 868 | 0.03s | 0.03s ✅ |
|
||||
| Standard | 868 | 13s | 5s |
|
||||
| Detailed | 868 | 300s+ (timeout) | 30s |
|
||||
|
||||
**Bottleneck Analysis:**
|
||||
```
|
||||
[DOMINANT_PATH] Point #1: line_bldgs=646, refl_bldgs=302
|
||||
- 868 points × 700 buildings × geometry = millions of operations
|
||||
- Single-threaded Python
|
||||
- 2 sec/point → 868 × 2 = 1736 sec theoretical
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ Coverage Calculation │
|
||||
├─────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ Phase 1: OSM Fetch (async, cached) → unchanged │
|
||||
│ Phase 2: Terrain Pre-load (async) → unchanged │
|
||||
│ Phase 3: Point Calculation → PARALLELIZE │
|
||||
│ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ ProcessPoolExecutor │ │
|
||||
│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
|
||||
│ │ │ Core 1 │ │ Core 2 │ │ Core 3 │ │ Core N │ │ │
|
||||
│ │ │ pts 0-61│ │pts 62-123│ │pts 124..│ │ pts ... │ │ │
|
||||
│ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────────────────────────────────────────────┐ │
|
||||
│ │ Optional: GPU Acceleration │ │
|
||||
│ │ - Path loss matrix calculation (NumPy → CuPy) │ │
|
||||
│ │ - Batch terrain lookups │ │
|
||||
│ │ - Vectorized distance calculations │ │
|
||||
│ └─────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✅ Tasks
|
||||
|
||||
### Task 2.3.1: Multiprocessing Infrastructure (3-4 hours)
|
||||
|
||||
**Problem:** Python GIL prevents true parallelism with threads. Need processes.
|
||||
|
||||
**Create `backend/app/services/parallel_coverage_service.py`:**
|
||||
|
||||
```python
|
||||
import os
|
||||
import multiprocessing as mp
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from typing import List, Dict, Any, Tuple
|
||||
import time
|
||||
|
||||
# Shared data for worker processes (loaded once per process)
|
||||
_worker_data = {}
|
||||
|
||||
def _init_worker(terrain_cache: Dict, buildings: List, spatial_index_data: Dict, settings_dict: Dict):
|
||||
"""Initialize worker process with shared data."""
|
||||
global _worker_data
|
||||
_worker_data = {
|
||||
'terrain_cache': terrain_cache,
|
||||
'buildings': buildings,
|
||||
'spatial_index': rebuild_spatial_index(spatial_index_data),
|
||||
'settings': settings_dict,
|
||||
}
|
||||
# Import heavy modules inside worker to avoid pickle issues
|
||||
from app.services.terrain_service import TerrainService
|
||||
from app.services.los_service import LOSService
|
||||
from app.services.dominant_path_service import DominantPathService
|
||||
|
||||
_worker_data['terrain_service'] = TerrainService()
|
||||
_worker_data['terrain_service']._tile_cache = terrain_cache
|
||||
_worker_data['los_service'] = LOSService(_worker_data['terrain_service'])
|
||||
_worker_data['dominant_path_service'] = DominantPathService(
|
||||
_worker_data['terrain_service'],
|
||||
_worker_data['los_service']
|
||||
)
|
||||
|
||||
def _calculate_point_worker(args: Tuple) -> Dict:
|
||||
"""Worker function for single point calculation."""
|
||||
global _worker_data
|
||||
lat, lon, site_lat, site_lon, site_elevation, point_elevation = args
|
||||
|
||||
# Use pre-initialized services
|
||||
terrain = _worker_data['terrain_service']
|
||||
los = _worker_data['los_service']
|
||||
dominant = _worker_data['dominant_path_service']
|
||||
settings = _worker_data['settings']
|
||||
buildings = _worker_data['buildings']
|
||||
spatial_idx = _worker_data['spatial_index']
|
||||
|
||||
# ... calculation logic (copy from _calculate_point_sync)
|
||||
|
||||
return {
|
||||
'lat': lat,
|
||||
'lon': lon,
|
||||
'rsrp': rsrp,
|
||||
'distance': distance,
|
||||
# ... other fields
|
||||
}
|
||||
|
||||
class ParallelCoverageService:
|
||||
"""Coverage calculation with multiprocessing."""
|
||||
|
||||
def __init__(self):
|
||||
# Detect available cores
|
||||
self.num_workers = min(mp.cpu_count(), 14) # Cap at 14
|
||||
print(f"[Coverage] Parallel mode: {self.num_workers} workers")
|
||||
|
||||
async def calculate_parallel(
|
||||
self,
|
||||
sites: List,
|
||||
settings: CoverageSettings,
|
||||
terrain_cache: Dict,
|
||||
buildings: List,
|
||||
spatial_index_data: Dict,
|
||||
) -> List[Dict]:
|
||||
"""Calculate coverage using multiple processes."""
|
||||
|
||||
# Prepare grid
|
||||
grid = self._generate_grid(sites, settings)
|
||||
total_points = len(grid)
|
||||
|
||||
print(f"[Coverage] Starting parallel calculation: {total_points} points, {self.num_workers} workers")
|
||||
|
||||
# Pre-compute point elevations
|
||||
point_elevations = {(lat, lon): elev for lat, lon, elev in grid_with_elevations}
|
||||
|
||||
# Prepare arguments for workers
|
||||
work_items = [
|
||||
(lat, lon, site.lat, site.lon, site_elevation, point_elevations.get((lat, lon), 0))
|
||||
for lat, lon in grid
|
||||
]
|
||||
|
||||
# Run in process pool
|
||||
results = []
|
||||
start_time = time.time()
|
||||
|
||||
with ProcessPoolExecutor(
|
||||
max_workers=self.num_workers,
|
||||
initializer=_init_worker,
|
||||
initargs=(terrain_cache, buildings, spatial_index_data, settings.dict())
|
||||
) as executor:
|
||||
# Submit all tasks
|
||||
futures = {executor.submit(_calculate_point_worker, item): i
|
||||
for i, item in enumerate(work_items)}
|
||||
|
||||
# Collect results with progress
|
||||
completed = 0
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
completed += 1
|
||||
|
||||
if completed % (total_points // 10) == 0:
|
||||
elapsed = time.time() - start_time
|
||||
rate = completed / elapsed
|
||||
eta = (total_points - completed) / rate
|
||||
print(f"[Coverage] Progress: {completed}/{total_points} ({100*completed//total_points}%) - ETA: {eta:.1f}s")
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
print(f"[Coverage] Parallel calculation done: {elapsed:.1f}s ({elapsed/total_points*1000:.1f}ms/point)")
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2.3.2: Data Serialization for Workers (2-3 hours)
|
||||
|
||||
**Problem:** Each worker process needs access to terrain cache, buildings, spatial index. Can't share directly.
|
||||
|
||||
**Solutions:**
|
||||
|
||||
1. **Shared Memory (Python 3.8+):**
|
||||
```python
|
||||
from multiprocessing import shared_memory
|
||||
import numpy as np
|
||||
|
||||
# Create shared terrain cache
|
||||
terrain_shm = shared_memory.SharedMemory(create=True, size=terrain_array.nbytes)
|
||||
terrain_shared = np.ndarray(terrain_array.shape, dtype=terrain_array.dtype, buffer=terrain_shm.buf)
|
||||
terrain_shared[:] = terrain_array[:]
|
||||
```
|
||||
|
||||
2. **Memory-mapped files:**
|
||||
```python
|
||||
import mmap
|
||||
import numpy as np
|
||||
|
||||
# Save terrain to mmap file
|
||||
terrain_mmap = np.memmap('terrain_cache.dat', dtype='int16', mode='w+', shape=(3601, 3601))
|
||||
terrain_mmap[:] = terrain_data[:]
|
||||
terrain_mmap.flush()
|
||||
|
||||
# Workers read from same file
|
||||
worker_terrain = np.memmap('terrain_cache.dat', dtype='int16', mode='r', shape=(3601, 3601))
|
||||
```
|
||||
|
||||
3. **Pickle once, load in each worker:**
|
||||
```python
|
||||
# Main process saves data
|
||||
import pickle
|
||||
with open('worker_data.pkl', 'wb') as f:
|
||||
pickle.dump({'terrain': terrain_cache, 'buildings': buildings}, f)
|
||||
|
||||
# Worker loads once at init
|
||||
def _init_worker(data_path):
|
||||
global _worker_data
|
||||
with open(data_path, 'rb') as f:
|
||||
_worker_data = pickle.load(f)
|
||||
```
|
||||
|
||||
**Recommendation:** Start with pickle (simplest), optimize with mmap if needed.
|
||||
|
||||
---
|
||||
|
||||
### Task 2.3.3: Integrate Parallel Service (2 hours)
|
||||
|
||||
**Update `coverage_service.py`:**
|
||||
|
||||
```python
|
||||
class CoverageService:
|
||||
def __init__(self):
|
||||
self.parallel_service = ParallelCoverageService()
|
||||
self.use_parallel = True # Can be toggled
|
||||
self.parallel_threshold = 100 # Use parallel for > 100 points
|
||||
|
||||
async def calculate(self, sites, settings):
|
||||
grid = self._generate_grid(sites, settings)
|
||||
|
||||
# Decide execution mode
|
||||
if self.use_parallel and len(grid) > self.parallel_threshold:
|
||||
return await self._calculate_parallel(sites, settings, grid)
|
||||
else:
|
||||
return await self._calculate_sequential(sites, settings, grid)
|
||||
|
||||
async def _calculate_parallel(self, sites, settings, grid):
|
||||
# Phase 1: OSM fetch (same as before)
|
||||
buildings, streets, water, vegetation = await self._fetch_osm_grid_aligned(...)
|
||||
|
||||
# Phase 2: Terrain pre-load (same as before)
|
||||
await self.terrain.ensure_tiles_for_bbox(...)
|
||||
terrain_cache = self.terrain._tile_cache.copy()
|
||||
|
||||
# Phase 3: Parallel point calculation
|
||||
spatial_index_data = self._serialize_spatial_index(spatial_idx)
|
||||
|
||||
results = await self.parallel_service.calculate_parallel(
|
||||
sites=sites,
|
||||
settings=settings,
|
||||
terrain_cache=terrain_cache,
|
||||
buildings=buildings,
|
||||
spatial_index_data=spatial_index_data,
|
||||
)
|
||||
|
||||
return results
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2.3.4: GPU Acceleration (Optional) (3-4 hours)
|
||||
|
||||
**Only if NVIDIA GPU detected. Use CuPy for NumPy-like GPU operations.**
|
||||
|
||||
**Create `backend/app/services/gpu_service.py`:**
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
# Check for GPU
|
||||
GPU_AVAILABLE = False
|
||||
try:
|
||||
import cupy as cp
|
||||
GPU_AVAILABLE = cp.cuda.runtime.getDeviceCount() > 0
|
||||
if GPU_AVAILABLE:
|
||||
print(f"[GPU] CUDA available: {cp.cuda.runtime.getDeviceProperties(0)['name'].decode()}")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
class GPUService:
|
||||
"""GPU-accelerated calculations using CuPy."""
|
||||
|
||||
def __init__(self):
|
||||
self.enabled = GPU_AVAILABLE
|
||||
|
||||
def calculate_path_loss_batch(
|
||||
self,
|
||||
distances: np.ndarray, # (N,) array of distances in meters
|
||||
frequency_mhz: float,
|
||||
tx_height: float,
|
||||
rx_height: float,
|
||||
) -> np.ndarray:
|
||||
"""Calculate Okumura-Hata path loss for all points at once."""
|
||||
|
||||
if self.enabled:
|
||||
import cupy as cp
|
||||
d = cp.asarray(distances)
|
||||
else:
|
||||
d = distances
|
||||
|
||||
# Okumura-Hata formula (vectorized)
|
||||
d_km = d / 1000.0
|
||||
f = frequency_mhz
|
||||
hb = tx_height
|
||||
hm = rx_height
|
||||
|
||||
# Urban area correction
|
||||
a_hm = (1.1 * np.log10(f) - 0.7) * hm - (1.56 * np.log10(f) - 0.8)
|
||||
|
||||
# Path loss
|
||||
L = (46.3 + 33.9 * np.log10(f) - 13.82 * np.log10(hb) - a_hm +
|
||||
(44.9 - 6.55 * np.log10(hb)) * np.log10(d_km))
|
||||
|
||||
if self.enabled:
|
||||
return cp.asnumpy(L)
|
||||
return L
|
||||
|
||||
def calculate_distances_batch(
|
||||
self,
|
||||
site_lat: float,
|
||||
site_lon: float,
|
||||
point_lats: np.ndarray,
|
||||
point_lons: np.ndarray,
|
||||
) -> np.ndarray:
|
||||
"""Calculate distances from site to all points (Haversine)."""
|
||||
|
||||
if self.enabled:
|
||||
import cupy as cp
|
||||
lat1 = cp.radians(site_lat)
|
||||
lon1 = cp.radians(site_lon)
|
||||
lat2 = cp.radians(cp.asarray(point_lats))
|
||||
lon2 = cp.radians(cp.asarray(point_lons))
|
||||
else:
|
||||
lat1 = np.radians(site_lat)
|
||||
lon1 = np.radians(site_lon)
|
||||
lat2 = np.radians(point_lats)
|
||||
lon2 = np.radians(point_lons)
|
||||
|
||||
dlat = lat2 - lat1
|
||||
dlon = lon2 - lon1
|
||||
|
||||
a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
|
||||
c = 2 * np.arcsin(np.sqrt(a))
|
||||
|
||||
R = 6371000 # Earth radius in meters
|
||||
distances = R * c
|
||||
|
||||
if self.enabled:
|
||||
return cp.asnumpy(distances)
|
||||
return distances
|
||||
|
||||
|
||||
gpu_service = GPUService()
|
||||
```
|
||||
|
||||
**Add to requirements.txt (optional):**
|
||||
```
|
||||
cupy-cuda12x>=12.0.0 # For CUDA 12.x
|
||||
# or cupy-cuda11x>=11.0.0 # For CUDA 11.x
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Task 2.3.5: Settings UI for Parallel/GPU (1 hour)
|
||||
|
||||
**Add to frontend Settings panel:**
|
||||
|
||||
```typescript
|
||||
// Performance settings
|
||||
<div className="settings-section">
|
||||
<h4>Performance</h4>
|
||||
|
||||
<label>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={settings.useParallel}
|
||||
onChange={(e) => updateSettings({ useParallel: e.target.checked })}
|
||||
/>
|
||||
Use parallel processing ({cpuCores} cores)
|
||||
</label>
|
||||
|
||||
{gpuAvailable && (
|
||||
<label>
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={settings.useGPU}
|
||||
onChange={(e) => updateSettings({ useGPU: e.target.checked })}
|
||||
/>
|
||||
Use GPU acceleration ({gpuName})
|
||||
</label>
|
||||
)}
|
||||
|
||||
<div className="worker-count">
|
||||
<label>Worker processes:</label>
|
||||
<input
|
||||
type="number"
|
||||
min={1}
|
||||
max={cpuCores}
|
||||
value={settings.workerCount}
|
||||
onChange={(e) => updateSettings({ workerCount: e.target.value })}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
```
|
||||
|
||||
**Add API endpoint for system info:**
|
||||
|
||||
```python
|
||||
@router.get("/api/system/info")
|
||||
async def get_system_info():
|
||||
import multiprocessing as mp
|
||||
|
||||
gpu_info = None
|
||||
try:
|
||||
import cupy as cp
|
||||
if cp.cuda.runtime.getDeviceCount() > 0:
|
||||
props = cp.cuda.runtime.getDeviceProperties(0)
|
||||
gpu_info = {
|
||||
'name': props['name'].decode(),
|
||||
'memory_mb': props['totalGlobalMem'] // (1024 * 1024),
|
||||
}
|
||||
except:
|
||||
pass
|
||||
|
||||
return {
|
||||
'cpu_cores': mp.cpu_count(),
|
||||
'gpu': gpu_info,
|
||||
'parallel_enabled': True,
|
||||
'gpu_enabled': gpu_info is not None,
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
```bash
|
||||
# Run performance test
|
||||
cd installer
|
||||
.\test-coverage.bat
|
||||
|
||||
# Expected results after optimization:
|
||||
# Fast: 0.03s (unchanged)
|
||||
# Standard: ~5s (was 13s)
|
||||
# Detailed: ~30s (was 300s+ timeout)
|
||||
```
|
||||
|
||||
**Benchmark script:**
|
||||
|
||||
```python
|
||||
# test_parallel.py
|
||||
import asyncio
|
||||
import time
|
||||
from app.services.coverage_service import coverage_service
|
||||
|
||||
async def benchmark():
|
||||
settings = CoverageSettings(
|
||||
radius=5000,
|
||||
resolution=300,
|
||||
preset='detailed',
|
||||
)
|
||||
|
||||
site = Site(lat=50.45, lon=30.52, ...)
|
||||
|
||||
# Warm up
|
||||
await coverage_service.calculate([site], settings)
|
||||
|
||||
# Benchmark
|
||||
times = []
|
||||
for i in range(3):
|
||||
start = time.time()
|
||||
result = await coverage_service.calculate([site], settings)
|
||||
elapsed = time.time() - start
|
||||
times.append(elapsed)
|
||||
print(f"Run {i+1}: {elapsed:.1f}s, {len(result)} points")
|
||||
|
||||
print(f"Average: {sum(times)/len(times):.1f}s")
|
||||
|
||||
asyncio.run(benchmark())
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✅ Success Criteria
|
||||
|
||||
- [ ] Multiprocessing uses all available CPU cores
|
||||
- [ ] Detailed preset completes in <60s for 5km radius
|
||||
- [ ] No memory leaks with large calculations
|
||||
- [ ] GPU acceleration works if NVIDIA card present
|
||||
- [ ] Settings UI shows core count and GPU status
|
||||
- [ ] Progress indicator updates during calculation
|
||||
|
||||
---
|
||||
|
||||
## 📊 Expected Performance
|
||||
|
||||
| Preset | Before | After (14 cores) | After (14 cores + GPU) |
|
||||
|--------|--------|------------------|------------------------|
|
||||
| Fast | 0.03s | 0.03s | 0.03s |
|
||||
| Standard | 13s | ~2s | ~1s |
|
||||
| Detailed | 300s+ | ~25s | ~10s |
|
||||
|
||||
---
|
||||
|
||||
## 🔜 Next: Phase 2.4
|
||||
|
||||
- [ ] R-tree spatial index (replace grid-based)
|
||||
- [ ] Simplified building geometry for distant points
|
||||
- [ ] Level-of-detail (LOD) system
|
||||
- [ ] Streaming results (show partial coverage while calculating)
|
||||
|
||||
---
|
||||
|
||||
**Ready for Claude Code** 🚀
|
||||
30
backend/app/api/routes/system.py
Normal file
30
backend/app/api/routes/system.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import multiprocessing as mp
|
||||
from fastapi import APIRouter
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/info")
|
||||
async def get_system_info():
|
||||
"""Return system info: CPU cores, GPU availability, parallel support."""
|
||||
cpu_cores = mp.cpu_count() or 1
|
||||
|
||||
gpu_info = None
|
||||
try:
|
||||
import cupy as cp
|
||||
if cp.cuda.runtime.getDeviceCount() > 0:
|
||||
props = cp.cuda.runtime.getDeviceProperties(0)
|
||||
gpu_info = {
|
||||
"name": props["name"].decode(),
|
||||
"memory_mb": props["totalGlobalMem"] // (1024 * 1024),
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"cpu_cores": cpu_cores,
|
||||
"parallel_workers": min(cpu_cores, 14),
|
||||
"parallel_enabled": True,
|
||||
"gpu": gpu_info,
|
||||
"gpu_enabled": gpu_info is not None,
|
||||
}
|
||||
@@ -4,7 +4,7 @@ from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.core.database import connect_to_mongo, close_mongo_connection
|
||||
from app.api.routes import health, projects, terrain, coverage, regions
|
||||
from app.api.routes import health, projects, terrain, coverage, regions, system
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@@ -36,6 +36,7 @@ app.include_router(projects.router, prefix="/api/projects", tags=["projects"])
|
||||
app.include_router(terrain.router, prefix="/api/terrain", tags=["terrain"])
|
||||
app.include_router(coverage.router, prefix="/api/coverage", tags=["coverage"])
|
||||
app.include_router(regions.router, prefix="/api/regions", tags=["regions"])
|
||||
app.include_router(system.router, prefix="/api/system", tags=["system"])
|
||||
|
||||
|
||||
@app.get("/")
|
||||
|
||||
@@ -53,6 +53,9 @@ from app.services.vegetation_service import vegetation_service, VegetationArea
|
||||
from app.services.weather_service import weather_service
|
||||
from app.services.indoor_service import indoor_service
|
||||
from app.services.atmospheric_service import atmospheric_service
|
||||
from app.services.parallel_coverage_service import (
|
||||
calculate_coverage_parallel, get_cpu_count
|
||||
)
|
||||
|
||||
|
||||
class CoveragePoint(BaseModel):
|
||||
@@ -349,19 +352,47 @@ class CoverageService:
|
||||
f"pre-computed {len(grid)} elevations")
|
||||
_clog(f"━━━ PHASE 2 done: {terrain_time:.1f}s ━━━")
|
||||
|
||||
# ━━━ PHASE 3: Point calculation (sync, in thread pool) ━━━
|
||||
_clog(f"━━━ PHASE 3: Calculating {len(grid)} points (threaded) ━━━")
|
||||
# ━━━ PHASE 3: Point calculation ━━━
|
||||
dominant_path_service._log_count = 0 # Reset diagnostic counter
|
||||
t_points = time.time()
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
points, timing = await loop.run_in_executor(
|
||||
None,
|
||||
self._run_point_loop,
|
||||
grid, site, settings, buildings, streets,
|
||||
spatial_idx, water_bodies, vegetation_areas,
|
||||
site_elevation, point_elevations
|
||||
)
|
||||
use_parallel = len(grid) > 100 and get_cpu_count() > 1
|
||||
num_workers = get_cpu_count()
|
||||
|
||||
if use_parallel:
|
||||
_clog(f"━━━ PHASE 3: Calculating {len(grid)} points "
|
||||
f"(PARALLEL, {num_workers} workers) ━━━")
|
||||
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
result_dicts, timing = await loop.run_in_executor(
|
||||
None,
|
||||
calculate_coverage_parallel,
|
||||
grid, point_elevations,
|
||||
site.model_dump(), settings.model_dump(),
|
||||
self.terrain._tile_cache,
|
||||
buildings, streets, water_bodies, vegetation_areas,
|
||||
site_elevation, num_workers, _clog,
|
||||
)
|
||||
|
||||
# Convert dicts back to CoveragePoint objects
|
||||
points = [CoveragePoint(**d) for d in result_dicts]
|
||||
|
||||
except Exception as e:
|
||||
_clog(f"Parallel failed ({e}), falling back to sequential")
|
||||
use_parallel = False
|
||||
|
||||
if not use_parallel:
|
||||
_clog(f"━━━ PHASE 3: Calculating {len(grid)} points (sequential) ━━━")
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
points, timing = await loop.run_in_executor(
|
||||
None,
|
||||
self._run_point_loop,
|
||||
grid, site, settings, buildings, streets,
|
||||
spatial_idx, water_bodies, vegetation_areas,
|
||||
site_elevation, point_elevations
|
||||
)
|
||||
|
||||
points_time = time.time() - t_points
|
||||
total_time = time.time() - calc_start
|
||||
@@ -375,13 +406,17 @@ class CoverageService:
|
||||
_clog(f" Point calc: {points_time:.1f}s "
|
||||
f"({points_time/max(1,len(grid))*1000:.1f}ms/point)")
|
||||
_clog(f" TOTAL: {total_time:.1f}s")
|
||||
_clog(f" Mode: {'parallel (' + str(num_workers) + ' workers)' if use_parallel else 'sequential'}")
|
||||
_clog(f" Tiles in memory: {len(self.terrain._tile_cache)}")
|
||||
if any(v > 0.001 for v in timing.values()):
|
||||
_clog("=== PER-STEP BREAKDOWN ===")
|
||||
for step, dt in timing.items():
|
||||
if dt > 0.001:
|
||||
_clog(f" {step:20s} {dt:.3f}s "
|
||||
f"({dt/max(1,len(grid))*1000:.2f}ms/point)")
|
||||
if isinstance(dt, float):
|
||||
_clog(f" {step:20s} {dt:.3f}s "
|
||||
f"({dt/max(1,len(grid))*1000:.2f}ms/point)")
|
||||
else:
|
||||
_clog(f" {step:20s} {dt}")
|
||||
|
||||
return points
|
||||
|
||||
|
||||
250
backend/app/services/parallel_coverage_service.py
Normal file
250
backend/app/services/parallel_coverage_service.py
Normal file
@@ -0,0 +1,250 @@
|
||||
"""
|
||||
Parallel coverage calculation using ProcessPoolExecutor.
|
||||
|
||||
Workers receive pre-loaded terrain cache, buildings, and OSM data
|
||||
via a shared pickle file. Each worker initializes module-level
|
||||
service singletons with the cached data, then processes point chunks.
|
||||
|
||||
Usage:
|
||||
from app.services.parallel_coverage_service import calculate_coverage_parallel
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import pickle
|
||||
import tempfile
|
||||
import multiprocessing as mp
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from typing import List, Dict, Tuple, Any, Optional, Callable
|
||||
import numpy as np
|
||||
|
||||
|
||||
# ── Module-level worker state (set once per process by _init_worker) ──
|
||||
|
||||
_worker_data: Dict[str, Any] = {}
|
||||
_worker_initialized = False
|
||||
|
||||
|
||||
def _init_worker(shared_data_path: str):
|
||||
"""Initialize a worker process with shared data from temp file.
|
||||
|
||||
Injects terrain cache into the module-level terrain_service singleton
|
||||
so that all other services (LOS, dominant path, etc.) automatically
|
||||
see the cached tiles.
|
||||
"""
|
||||
global _worker_data, _worker_initialized
|
||||
|
||||
if _worker_initialized:
|
||||
return
|
||||
|
||||
t0 = time.time()
|
||||
pid = os.getpid()
|
||||
|
||||
# Load shared data
|
||||
with open(shared_data_path, 'rb') as f:
|
||||
data = pickle.load(f)
|
||||
|
||||
# Inject terrain cache into the global singleton —
|
||||
# this automatically fixes los_service, dominant_path_service, etc.
|
||||
# because they hold references to the same terrain_service object.
|
||||
from app.services.terrain_service import terrain_service
|
||||
terrain_service._tile_cache = data['terrain_cache']
|
||||
|
||||
# Build spatial index from buildings
|
||||
from app.services.spatial_index import SpatialIndex
|
||||
spatial_idx = SpatialIndex()
|
||||
if data['buildings']:
|
||||
spatial_idx.build(data['buildings'])
|
||||
|
||||
_worker_data = {
|
||||
'buildings': data['buildings'],
|
||||
'streets': data['streets'],
|
||||
'water_bodies': data['water_bodies'],
|
||||
'vegetation_areas': data['vegetation_areas'],
|
||||
'spatial_idx': spatial_idx,
|
||||
'site_dict': data['site_dict'],
|
||||
'settings_dict': data['settings_dict'],
|
||||
'site_elevation': data['site_elevation'],
|
||||
}
|
||||
|
||||
_worker_initialized = True
|
||||
dt = time.time() - t0
|
||||
print(f"[WORKER {pid}] Initialized in {dt:.1f}s — "
|
||||
f"{len(data['terrain_cache'])} tiles, "
|
||||
f"{len(data['buildings'])} buildings, "
|
||||
f"{len(data.get('vegetation_areas', []))} vegetation",
|
||||
flush=True)
|
||||
|
||||
|
||||
def _process_chunk(chunk: List[Tuple[float, float, float]]) -> List[Dict]:
|
||||
"""Process a chunk of (lat, lon, point_elevation) tuples.
|
||||
|
||||
Returns list of CoveragePoint dicts for points above min_signal.
|
||||
"""
|
||||
from app.services.coverage_service import CoverageService, SiteParams, CoverageSettings
|
||||
|
||||
data = _worker_data
|
||||
site = SiteParams(**data['site_dict'])
|
||||
settings = CoverageSettings(**data['settings_dict'])
|
||||
|
||||
svc = CoverageService()
|
||||
|
||||
timing = {
|
||||
"los": 0.0, "buildings": 0.0, "antenna": 0.0,
|
||||
"dominant_path": 0.0, "street_canyon": 0.0,
|
||||
"reflection": 0.0, "vegetation": 0.0,
|
||||
}
|
||||
|
||||
results = []
|
||||
for lat, lon, point_elev in chunk:
|
||||
point = svc._calculate_point_sync(
|
||||
site, lat, lon, settings,
|
||||
data['buildings'], data['streets'],
|
||||
data['spatial_idx'], data['water_bodies'],
|
||||
data['vegetation_areas'],
|
||||
data['site_elevation'], point_elev, timing,
|
||||
)
|
||||
if point.rsrp >= settings.min_signal:
|
||||
results.append(point.model_dump())
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# ── Public API ──
|
||||
|
||||
|
||||
def get_cpu_count() -> int:
|
||||
"""Get number of usable CPU cores, capped at 14."""
|
||||
try:
|
||||
return min(mp.cpu_count() or 4, 14)
|
||||
except Exception:
|
||||
return 4
|
||||
|
||||
|
||||
def calculate_coverage_parallel(
|
||||
grid: List[Tuple[float, float]],
|
||||
point_elevations: Dict[Tuple[float, float], float],
|
||||
site_dict: Dict,
|
||||
settings_dict: Dict,
|
||||
terrain_cache: Dict[str, np.ndarray],
|
||||
buildings: List,
|
||||
streets: List,
|
||||
water_bodies: List,
|
||||
vegetation_areas: List,
|
||||
site_elevation: float,
|
||||
num_workers: Optional[int] = None,
|
||||
log_fn: Optional[Callable[[str], None]] = None,
|
||||
) -> Tuple[List[Dict], Dict[str, float]]:
|
||||
"""Calculate coverage points in parallel using ProcessPoolExecutor.
|
||||
|
||||
Args:
|
||||
grid: List of (lat, lon) tuples.
|
||||
point_elevations: Pre-computed {(lat, lon): elevation} dict.
|
||||
site_dict: SiteParams as a dict (for pickling).
|
||||
settings_dict: CoverageSettings as a dict (for pickling).
|
||||
terrain_cache: {tile_name: np.ndarray} — pre-loaded SRTM tiles.
|
||||
buildings, streets, water_bodies, vegetation_areas: OSM data.
|
||||
site_elevation: Elevation at site location (meters).
|
||||
num_workers: Override worker count (default: auto-detect).
|
||||
log_fn: Logging function (receives string messages).
|
||||
|
||||
Returns:
|
||||
(results, timing) where results is list of CoveragePoint dicts.
|
||||
"""
|
||||
if log_fn is None:
|
||||
log_fn = lambda msg: print(f"[PARALLEL] {msg}", flush=True)
|
||||
|
||||
if num_workers is None:
|
||||
num_workers = get_cpu_count()
|
||||
|
||||
total_points = len(grid)
|
||||
log_fn(f"Parallel mode: {total_points} points, {num_workers} workers")
|
||||
|
||||
# Prepare items with pre-computed elevations
|
||||
items = [
|
||||
(lat, lon, point_elevations.get((lat, lon), 0.0))
|
||||
for lat, lon in grid
|
||||
]
|
||||
|
||||
# Split into chunks — ~4 chunks per worker for granular progress
|
||||
chunks_per_worker = 4
|
||||
chunk_size = max(1, len(items) // (num_workers * chunks_per_worker))
|
||||
chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
|
||||
log_fn(f"Split into {len(chunks)} chunks of ~{chunk_size} points")
|
||||
|
||||
# ── Serialize shared data to temp file (once, not per-worker) ──
|
||||
t_serial = time.time()
|
||||
shared_data = {
|
||||
'terrain_cache': terrain_cache,
|
||||
'buildings': buildings,
|
||||
'streets': streets,
|
||||
'water_bodies': water_bodies,
|
||||
'vegetation_areas': vegetation_areas,
|
||||
'site_dict': site_dict,
|
||||
'settings_dict': settings_dict,
|
||||
'site_elevation': site_elevation,
|
||||
}
|
||||
|
||||
tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix='.pkl')
|
||||
try:
|
||||
pickle.dump(shared_data, tmpfile, protocol=pickle.HIGHEST_PROTOCOL)
|
||||
finally:
|
||||
tmpfile.close()
|
||||
|
||||
shared_data_path = tmpfile.name
|
||||
file_size_mb = os.path.getsize(shared_data_path) / (1024 * 1024)
|
||||
serial_time = time.time() - t_serial
|
||||
log_fn(f"Serialized shared data: {file_size_mb:.1f}MB in {serial_time:.1f}s")
|
||||
|
||||
# Free main-process memory for the duplicate
|
||||
del shared_data
|
||||
|
||||
# ── Run in process pool ──
|
||||
t_calc = time.time()
|
||||
all_results: List[Dict] = []
|
||||
completed_points = 0
|
||||
|
||||
try:
|
||||
with ProcessPoolExecutor(
|
||||
max_workers=num_workers,
|
||||
initializer=_init_worker,
|
||||
initargs=(shared_data_path,),
|
||||
) as executor:
|
||||
futures = [executor.submit(_process_chunk, chunk) for chunk in chunks]
|
||||
|
||||
for i, future in enumerate(futures):
|
||||
try:
|
||||
chunk_results = future.result(timeout=600) # 10 min max per chunk
|
||||
all_results.extend(chunk_results)
|
||||
except Exception as e:
|
||||
log_fn(f"Chunk {i} failed: {e}")
|
||||
|
||||
completed_points += len(chunks[i])
|
||||
pct = min(100, completed_points * 100 // total_points)
|
||||
elapsed = time.time() - t_calc
|
||||
rate = completed_points / elapsed if elapsed > 0 else 0
|
||||
|
||||
# Log every ~10% or on last chunk
|
||||
if (i + 1) % max(1, len(chunks) // 10) == 0 or i == len(chunks) - 1:
|
||||
eta = (total_points - completed_points) / rate if rate > 0 else 0
|
||||
log_fn(f"Progress: {completed_points}/{total_points} ({pct}%) — "
|
||||
f"{rate:.0f} pts/s, ETA {eta:.0f}s")
|
||||
|
||||
finally:
|
||||
# Clean up temp file
|
||||
try:
|
||||
os.unlink(shared_data_path)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
calc_time = time.time() - t_calc
|
||||
log_fn(f"Parallel done: {calc_time:.1f}s, {len(all_results)} results "
|
||||
f"({calc_time / max(1, total_points) * 1000:.1f}ms/point)")
|
||||
|
||||
timing = {
|
||||
"parallel_total": calc_time,
|
||||
"serialize": serial_time,
|
||||
"workers": num_workers,
|
||||
}
|
||||
return all_results, timing
|
||||
@@ -1,6 +1,11 @@
|
||||
"""Entry point for PyInstaller bundle"""
|
||||
import os
|
||||
import sys
|
||||
import multiprocessing
|
||||
|
||||
# Required for ProcessPoolExecutor to work in PyInstaller frozen exe on Windows.
|
||||
# Must be called before any other multiprocessing usage.
|
||||
multiprocessing.freeze_support()
|
||||
|
||||
# Force unbuffered stdout/stderr — critical for piped output (Electron, bat files)
|
||||
os.environ['PYTHONUNBUFFERED'] = '1'
|
||||
|
||||
@@ -8,7 +8,9 @@ const store = new Store();
|
||||
let mainWindow;
|
||||
let splashWindow;
|
||||
let backendProcess;
|
||||
let backendPid = null; // Store PID separately — survives even if backendProcess ref is lost
|
||||
let backendLogStream;
|
||||
let isQuitting = false;
|
||||
|
||||
// ── Paths ──────────────────────────────────────────────────────────
|
||||
|
||||
@@ -184,6 +186,10 @@ async function startBackend() {
|
||||
});
|
||||
}
|
||||
|
||||
// Store PID immediately
|
||||
backendPid = backendProcess.pid;
|
||||
log(`Backend PID: ${backendPid}`);
|
||||
|
||||
// Pipe backend output to log
|
||||
const backendLogFile = path.join(logDir, 'rfcp-backend.log');
|
||||
const backendLog = fs.createWriteStream(backendLogFile, { flags: 'w' });
|
||||
@@ -262,10 +268,15 @@ function createMainWindow() {
|
||||
titleBarStyle: process.platform === 'darwin' ? 'hiddenInset' : 'default',
|
||||
});
|
||||
|
||||
// Save window state on close
|
||||
// Save window state on close and trigger shutdown
|
||||
mainWindow.on('close', () => {
|
||||
const bounds = mainWindow.getBounds();
|
||||
store.set('windowState', bounds);
|
||||
try {
|
||||
const bounds = mainWindow.getBounds();
|
||||
store.set('windowState', bounds);
|
||||
} catch (_e) {}
|
||||
log('Main window closing — killing backend');
|
||||
isQuitting = true;
|
||||
killBackend();
|
||||
});
|
||||
|
||||
// Load frontend
|
||||
@@ -309,28 +320,33 @@ function createMainWindow() {
|
||||
// ── Backend cleanup ───────────────────────────────────────────────
|
||||
|
||||
function killBackend() {
|
||||
if (!backendProcess) return;
|
||||
const pid = backendPid || backendProcess?.pid;
|
||||
if (!pid) return;
|
||||
|
||||
const pid = backendProcess.pid;
|
||||
log(`Killing backend (PID ${pid})...`);
|
||||
|
||||
try {
|
||||
if (process.platform === 'win32') {
|
||||
// Windows: taskkill with /T (tree) to kill child processes too
|
||||
execSync(`taskkill /f /t /pid ${pid}`, { stdio: 'ignore' });
|
||||
// Windows: taskkill with /F (force) /T (tree — kills child processes too)
|
||||
execSync(`taskkill /F /T /PID ${pid}`, { stdio: 'ignore' });
|
||||
} else {
|
||||
// Unix: kill process group
|
||||
process.kill(-pid, 'SIGTERM');
|
||||
try {
|
||||
process.kill(-pid, 'SIGTERM');
|
||||
} catch (_e) {
|
||||
process.kill(pid, 'SIGTERM');
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// Fallback: try normal kill
|
||||
// Fallback: try normal kill via process handle
|
||||
try {
|
||||
backendProcess.kill('SIGKILL');
|
||||
backendProcess?.kill('SIGKILL');
|
||||
} catch (_e2) {
|
||||
// Already dead
|
||||
// Already dead — that's fine
|
||||
}
|
||||
}
|
||||
|
||||
backendPid = null;
|
||||
backendProcess = null;
|
||||
log('Backend killed');
|
||||
}
|
||||
@@ -365,6 +381,8 @@ app.whenReady().then(async () => {
|
||||
});
|
||||
|
||||
app.on('window-all-closed', () => {
|
||||
log('Event: window-all-closed');
|
||||
isQuitting = true;
|
||||
killBackend();
|
||||
|
||||
if (process.platform !== 'darwin') {
|
||||
@@ -379,14 +397,36 @@ app.on('activate', () => {
|
||||
});
|
||||
|
||||
app.on('before-quit', () => {
|
||||
log('Event: before-quit');
|
||||
isQuitting = true;
|
||||
killBackend();
|
||||
});
|
||||
|
||||
app.on('will-quit', () => {
|
||||
log('Event: will-quit');
|
||||
killBackend();
|
||||
|
||||
if (backendLogStream) {
|
||||
backendLogStream.end();
|
||||
try { backendLogStream.end(); } catch (_e) {}
|
||||
backendLogStream = null;
|
||||
}
|
||||
});
|
||||
|
||||
// Last resort: ensure backend is killed when Node process exits
|
||||
process.on('exit', () => {
|
||||
if (backendPid) {
|
||||
try {
|
||||
if (process.platform === 'win32') {
|
||||
execSync(`taskkill /F /T /PID ${backendPid}`, { stdio: 'ignore' });
|
||||
} else {
|
||||
process.kill(backendPid, 'SIGKILL');
|
||||
}
|
||||
} catch (_e) {
|
||||
// Best effort
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// ── IPC Handlers ───────────────────────────────────────────────────
|
||||
|
||||
ipcMain.handle('get-data-path', () => getDataPath());
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"detail":"Calculation timeout (5 min) — try smaller radius or lower resolution"}
|
||||
{"points":[],"count":0,"settings":{"radius":5000.0,"resolution":300.0,"min_signal":-120.0,"use_terrain":true,"use_buildings":true,"use_materials":true,"use_dominant_path":true,"use_street_canyon":false,"use_reflections":false,"use_water_reflection":false,"use_vegetation":true,"season":"summer","rain_rate":0.0,"indoor_loss_type":"none","use_atmospheric":false,"temperature_c":15.0,"humidity_percent":50.0,"preset":"detailed"},"stats":{"min_rsrp":0,"max_rsrp":0,"avg_rsrp":0,"los_percentage":0,"points_with_buildings":0,"points_with_terrain_loss":0,"points_with_reflection_gain":0,"points_with_vegetation_loss":0,"points_with_rain_loss":0,"points_with_indoor_loss":0,"points_with_atmospheric_loss":0},"computation_time":38.69,"models_used":["okumura_hata","terrain_los","buildings","materials","dominant_path","vegetation"]}
|
||||
File diff suppressed because one or more lines are too long
@@ -19,6 +19,12 @@ curl -s %API%/api/health
|
||||
echo.
|
||||
echo.
|
||||
|
||||
:: Test 2b: System info (CPU cores, parallel mode)
|
||||
echo [TEST 2b] System info:
|
||||
curl -s %API%/api/system/info
|
||||
echo.
|
||||
echo.
|
||||
|
||||
:: Test 3: Coverage - Fast preset, 1 site, 2km radius, 500m resolution (small/quick)
|
||||
echo [TEST 3] Coverage calculation (Fast preset, 2km radius, 500m res)...
|
||||
echo This should complete in a few seconds.
|
||||
|
||||
Reference in New Issue
Block a user