@mytec: prep for cuda

2026-02-01 10:26:24 +02:00
parent 221000d5b3
commit 7893c57bc9
5 changed files with 144 additions and 13 deletions
--- a/backend/app/services/coverage_service.py
+++ b/backend/app/services/coverage_service.py
@@ -409,15 +409,14 @@ class CoverageService:
        _clog(f"  TOTAL:           {total_time:.1f}s")
        _clog(f"  Mode:            {'parallel (' + str(num_workers) + ' workers)' if use_parallel else 'sequential'}")
        _clog(f"  Tiles in memory: {len(self.terrain._tile_cache)}")
-        if any(v > 0.001 for v in timing.values()):
+        if any(isinstance(v, (int, float)) and v > 0.001 for v in timing.values()):
            _clog("=== PER-STEP BREAKDOWN ===")
            for step, dt in timing.items():
-                if dt > 0.001:
-                    if isinstance(dt, float):
-                        _clog(f"  {step:20s} {dt:.3f}s "
-                              f"({dt/max(1,len(grid))*1000:.2f}ms/point)")
-                    else:
-                        _clog(f"  {step:20s} {dt}")
+                if isinstance(dt, (int, float)) and dt > 0.001:
+                    _clog(f"  {step:20s} {dt:.3f}s "
+                          f"({dt/max(1,len(grid))*1000:.2f}ms/point)")
+                elif not isinstance(dt, (int, float)):
+                    _clog(f"  {step:20s} {dt}")

        return points

--- a/backend/app/services/parallel_coverage_service.py
+++ b/backend/app/services/parallel_coverage_service.py
@@ -2,13 +2,19 @@
 Parallel coverage calculation.

 Primary backend:  Ray (shared-memory object store, zero-copy numpy arrays)
-Fallback:         Sequential (single-threaded, no extra dependencies)
+Fallback:         ProcessPoolExecutor (4-6 workers to limit memory)
+Last resort:      Sequential (single-threaded, no extra dependencies)

 Ray advantages over ProcessPoolExecutor:
  - ray.put() stores terrain cache ONCE in shared memory
  - Workers access numpy arrays via zero-copy (no per-worker pickle/copy)
  - Eliminates MemoryError on Detailed preset with large terrain + buildings

+ProcessPoolExecutor fallback:
+  - Used when Ray is unavailable (e.g. PyInstaller builds)
+  - Capped at 6 workers to prevent MemoryError from data pickling
+  - Each worker gets a full copy of terrain/buildings (no shared memory)
+
 Usage:
    from app.services.parallel_coverage_service import (
        calculate_coverage_parallel, get_cpu_count, RAY_AVAILABLE,
@@ -111,7 +117,7 @@ def get_parallel_backend() -> str:
    """Return which parallel backend is available."""
    if RAY_AVAILABLE:
        return "ray"
-    return "sequential"
+    return "process_pool"


 def _try_init_ray(num_cpus: int) -> bool:
@@ -184,7 +190,20 @@ def calculate_coverage_parallel(
        except Exception as e:
            log_fn(f"Ray execution failed: {e} — falling back to sequential")

-    # Fallback: sequential
+    # Fallback: ProcessPoolExecutor with reduced workers to avoid MemoryError
+    pool_workers = min(num_workers, 6)
+    if pool_workers > 1 and total_points > 100:
+        try:
+            return _calculate_with_process_pool(
+                grid, point_elevations, site_dict, settings_dict,
+                terrain_cache, buildings, streets, water_bodies,
+                vegetation_areas, site_elevation,
+                pool_workers, log_fn,
+            )
+        except Exception as e:
+            log_fn(f"ProcessPool failed: {e} — falling back to sequential")
+
+    # Last resort: sequential
    log_fn(f"Sequential fallback: {total_points} points")
    return _calculate_sequential(
        grid, point_elevations, site_dict, settings_dict,
@@ -287,6 +306,122 @@ def _calculate_with_ray(
    return all_results, timing


+# ── ProcessPoolExecutor fallback ──
+
+
+def _pool_worker_process_chunk(args):
+    """Worker function for ProcessPoolExecutor. Processes a chunk of points."""
+    chunk, terrain_cache, buildings, osm_data, config = args
+
+    from app.services.terrain_service import terrain_service
+    terrain_service._tile_cache = terrain_cache
+
+    from app.services.spatial_index import SpatialIndex
+    spatial_idx = SpatialIndex()
+    if buildings:
+        spatial_idx.build(buildings)
+
+    from app.services.coverage_service import CoverageService, SiteParams, CoverageSettings
+
+    site = SiteParams(**config['site_dict'])
+    settings = CoverageSettings(**config['settings_dict'])
+    svc = CoverageService()
+
+    timing = {
+        "los": 0.0, "buildings": 0.0, "antenna": 0.0,
+        "dominant_path": 0.0, "street_canyon": 0.0,
+        "reflection": 0.0, "vegetation": 0.0,
+    }
+
+    results = []
+    for lat, lon, point_elev in chunk:
+        point = svc._calculate_point_sync(
+            site, lat, lon, settings,
+            buildings, osm_data.get('streets', []),
+            spatial_idx, osm_data.get('water_bodies', []),
+            osm_data.get('vegetation_areas', []),
+            config['site_elevation'], point_elev, timing,
+        )
+        if point.rsrp >= settings.min_signal:
+            results.append(point.model_dump())
+
+    return results
+
+
+def _calculate_with_process_pool(
+    grid, point_elevations, site_dict, settings_dict,
+    terrain_cache, buildings, streets, water_bodies,
+    vegetation_areas, site_elevation,
+    num_workers, log_fn,
+):
+    """Execute using ProcessPoolExecutor with reduced workers to limit memory."""
+    from concurrent.futures import ProcessPoolExecutor, as_completed
+
+    total_points = len(grid)
+    log_fn(f"ProcessPool mode: {total_points} points, {num_workers} workers")
+
+    items = [
+        (lat, lon, point_elevations.get((lat, lon), 0.0))
+        for lat, lon in grid
+    ]
+
+    # Larger chunks than Ray — fewer workers means bigger chunks
+    chunk_size = max(1, len(items) // (num_workers * 2))
+    chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
+    log_fn(f"Submitting {len(chunks)} chunks of ~{chunk_size} points")
+
+    config = {
+        'site_dict': site_dict,
+        'settings_dict': settings_dict,
+        'site_elevation': site_elevation,
+    }
+    osm_data = {
+        'streets': streets,
+        'water_bodies': water_bodies,
+        'vegetation_areas': vegetation_areas,
+    }
+
+    t_calc = time.time()
+    all_results: List[Dict] = []
+
+    with ProcessPoolExecutor(max_workers=num_workers) as executor:
+        futures = {
+            executor.submit(
+                _pool_worker_process_chunk,
+                (chunk, terrain_cache, buildings, osm_data, config),
+            ): i
+            for i, chunk in enumerate(chunks)
+        }
+
+        completed_chunks = 0
+        for future in as_completed(futures):
+            try:
+                chunk_results = future.result()
+                all_results.extend(chunk_results)
+            except Exception as e:
+                log_fn(f"Chunk error: {e}")
+
+            completed_chunks += 1
+            pct = completed_chunks * 100 // len(chunks)
+            elapsed = time.time() - t_calc
+            pts = len(all_results)
+            rate = pts / elapsed if elapsed > 0 else 0
+            eta = (total_points - pts) / rate if rate > 0 else 0
+            log_fn(f"Progress: {completed_chunks}/{len(chunks)} chunks ({pct}%) — "
+                   f"{pts} pts, {rate:.0f} pts/s, ETA {eta:.0f}s")
+
+    calc_time = time.time() - t_calc
+    log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "
+           f"({calc_time / max(1, total_points) * 1000:.1f}ms/point)")
+
+    timing = {
+        "parallel_total": calc_time,
+        "workers": num_workers,
+        "backend": "process_pool",
+    }
+    return all_results, timing
+
+
 # ── Sequential fallback ──