@mytec: prep for cuda

This commit is contained in:
2026-02-01 10:26:24 +02:00
parent 221000d5b3
commit 7893c57bc9
5 changed files with 144 additions and 13 deletions

View File

@@ -409,15 +409,14 @@ class CoverageService:
_clog(f" TOTAL: {total_time:.1f}s")
_clog(f" Mode: {'parallel (' + str(num_workers) + ' workers)' if use_parallel else 'sequential'}")
_clog(f" Tiles in memory: {len(self.terrain._tile_cache)}")
if any(v > 0.001 for v in timing.values()):
if any(isinstance(v, (int, float)) and v > 0.001 for v in timing.values()):
_clog("=== PER-STEP BREAKDOWN ===")
for step, dt in timing.items():
if dt > 0.001:
if isinstance(dt, float):
_clog(f" {step:20s} {dt:.3f}s "
f"({dt/max(1,len(grid))*1000:.2f}ms/point)")
else:
_clog(f" {step:20s} {dt}")
if isinstance(dt, (int, float)) and dt > 0.001:
_clog(f" {step:20s} {dt:.3f}s "
f"({dt/max(1,len(grid))*1000:.2f}ms/point)")
elif not isinstance(dt, (int, float)):
_clog(f" {step:20s} {dt}")
return points

View File

@@ -2,13 +2,19 @@
Parallel coverage calculation.
Primary backend: Ray (shared-memory object store, zero-copy numpy arrays)
Fallback: Sequential (single-threaded, no extra dependencies)
Fallback: ProcessPoolExecutor (4-6 workers to limit memory)
Last resort: Sequential (single-threaded, no extra dependencies)
Ray advantages over ProcessPoolExecutor:
- ray.put() stores terrain cache ONCE in shared memory
- Workers access numpy arrays via zero-copy (no per-worker pickle/copy)
- Eliminates MemoryError on Detailed preset with large terrain + buildings
ProcessPoolExecutor fallback:
- Used when Ray is unavailable (e.g. PyInstaller builds)
- Capped at 6 workers to prevent MemoryError from data pickling
- Each worker gets a full copy of terrain/buildings (no shared memory)
Usage:
from app.services.parallel_coverage_service import (
calculate_coverage_parallel, get_cpu_count, RAY_AVAILABLE,
@@ -111,7 +117,7 @@ def get_parallel_backend() -> str:
"""Return which parallel backend is available."""
if RAY_AVAILABLE:
return "ray"
return "sequential"
return "process_pool"
def _try_init_ray(num_cpus: int) -> bool:
@@ -184,7 +190,20 @@ def calculate_coverage_parallel(
except Exception as e:
log_fn(f"Ray execution failed: {e} — falling back to sequential")
# Fallback: sequential
# Fallback: ProcessPoolExecutor with reduced workers to avoid MemoryError
pool_workers = min(num_workers, 6)
if pool_workers > 1 and total_points > 100:
try:
return _calculate_with_process_pool(
grid, point_elevations, site_dict, settings_dict,
terrain_cache, buildings, streets, water_bodies,
vegetation_areas, site_elevation,
pool_workers, log_fn,
)
except Exception as e:
log_fn(f"ProcessPool failed: {e} — falling back to sequential")
# Last resort: sequential
log_fn(f"Sequential fallback: {total_points} points")
return _calculate_sequential(
grid, point_elevations, site_dict, settings_dict,
@@ -287,6 +306,122 @@ def _calculate_with_ray(
return all_results, timing
# ── ProcessPoolExecutor fallback ──
def _pool_worker_process_chunk(args):
"""Worker function for ProcessPoolExecutor. Processes a chunk of points."""
chunk, terrain_cache, buildings, osm_data, config = args
from app.services.terrain_service import terrain_service
terrain_service._tile_cache = terrain_cache
from app.services.spatial_index import SpatialIndex
spatial_idx = SpatialIndex()
if buildings:
spatial_idx.build(buildings)
from app.services.coverage_service import CoverageService, SiteParams, CoverageSettings
site = SiteParams(**config['site_dict'])
settings = CoverageSettings(**config['settings_dict'])
svc = CoverageService()
timing = {
"los": 0.0, "buildings": 0.0, "antenna": 0.0,
"dominant_path": 0.0, "street_canyon": 0.0,
"reflection": 0.0, "vegetation": 0.0,
}
results = []
for lat, lon, point_elev in chunk:
point = svc._calculate_point_sync(
site, lat, lon, settings,
buildings, osm_data.get('streets', []),
spatial_idx, osm_data.get('water_bodies', []),
osm_data.get('vegetation_areas', []),
config['site_elevation'], point_elev, timing,
)
if point.rsrp >= settings.min_signal:
results.append(point.model_dump())
return results
def _calculate_with_process_pool(
grid, point_elevations, site_dict, settings_dict,
terrain_cache, buildings, streets, water_bodies,
vegetation_areas, site_elevation,
num_workers, log_fn,
):
"""Execute using ProcessPoolExecutor with reduced workers to limit memory."""
from concurrent.futures import ProcessPoolExecutor, as_completed
total_points = len(grid)
log_fn(f"ProcessPool mode: {total_points} points, {num_workers} workers")
items = [
(lat, lon, point_elevations.get((lat, lon), 0.0))
for lat, lon in grid
]
# Larger chunks than Ray — fewer workers means bigger chunks
chunk_size = max(1, len(items) // (num_workers * 2))
chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
log_fn(f"Submitting {len(chunks)} chunks of ~{chunk_size} points")
config = {
'site_dict': site_dict,
'settings_dict': settings_dict,
'site_elevation': site_elevation,
}
osm_data = {
'streets': streets,
'water_bodies': water_bodies,
'vegetation_areas': vegetation_areas,
}
t_calc = time.time()
all_results: List[Dict] = []
with ProcessPoolExecutor(max_workers=num_workers) as executor:
futures = {
executor.submit(
_pool_worker_process_chunk,
(chunk, terrain_cache, buildings, osm_data, config),
): i
for i, chunk in enumerate(chunks)
}
completed_chunks = 0
for future in as_completed(futures):
try:
chunk_results = future.result()
all_results.extend(chunk_results)
except Exception as e:
log_fn(f"Chunk error: {e}")
completed_chunks += 1
pct = completed_chunks * 100 // len(chunks)
elapsed = time.time() - t_calc
pts = len(all_results)
rate = pts / elapsed if elapsed > 0 else 0
eta = (total_points - pts) / rate if rate > 0 else 0
log_fn(f"Progress: {completed_chunks}/{len(chunks)} chunks ({pct}%) — "
f"{pts} pts, {rate:.0f} pts/s, ETA {eta:.0f}s")
calc_time = time.time() - t_calc
log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "
f"({calc_time / max(1, total_points) * 1000:.1f}ms/point)")
timing = {
"parallel_total": calc_time,
"workers": num_workers,
"backend": "process_pool",
}
return all_results, timing
# ── Sequential fallback ──