@mytec: iter3.4.0 start

2026-02-02 21:30:00 +02:00
parent 7f0b4d2269
commit 867ee3d0f4
29 changed files with 1386 additions and 324 deletions
--- a/backend/app/api/websocket.py
+++ b/backend/app/api/websocket.py
@@ -8,7 +8,6 @@ progress updates during computation phases.
 import time
 import asyncio
 import logging
-import threading
 from typing import Optional

 from fastapi import WebSocket, WebSocketDisconnect
@@ -51,7 +50,7 @@ class ConnectionManager:
                "data": result,
            })
        except Exception as e:
-            logger.debug(f"[WS] send_result failed: {e}")
+            logger.warning(f"[WS] send_result failed: {e}")

    async def send_error(self, ws: WebSocket, calc_id: str, error: str):
        try:
@@ -61,7 +60,7 @@ class ConnectionManager:
                "message": error,
            })
        except Exception as e:
-            logger.debug(f"[WS] send_error failed: {e}")
+            logger.warning(f"[WS] send_error failed: {e}")


 ws_manager = ConnectionManager()
@@ -74,14 +73,32 @@ async def _run_calculation(ws: WebSocket, calc_id: str, data: dict):

    # Shared progress state — written by worker threads, polled by event loop.
    # Python GIL makes dict value assignment atomic for simple types.
-    _progress = {"phase": "Initializing", "pct": 0.05, "seq": 0}
+    _progress = {"phase": "Initializing", "pct": 0.0, "seq": 0}
    _done = False

+    # Get event loop for cross-thread scheduling of WS sends.
+    loop = asyncio.get_running_loop()
+    _last_direct_pct = 0.0
+    _last_direct_phase = ""
+
    def sync_progress_fn(phase: str, pct: float, _eta: Optional[float] = None):
-        """Thread-safe progress callback — just updates a shared dict."""
+        """Thread-safe progress callback — updates dict AND schedules direct WS send."""
+        nonlocal _last_direct_pct, _last_direct_phase
        _progress["phase"] = phase
        _progress["pct"] = pct
        _progress["seq"] += 1
+        # Schedule direct WS send via event loop (works from any thread).
+        # Throttle: only send on phase change or >=2% progress.
+        if phase != _last_direct_phase or pct - _last_direct_pct >= 0.02:
+            _last_direct_pct = pct
+            _last_direct_phase = phase
+            try:
+                loop.call_soon_threadsafe(
+                    asyncio.ensure_future,
+                    ws_manager.send_progress(ws, calc_id, phase, pct),
+                )
+            except RuntimeError:
+                pass  # Event loop closed

    try:
        sites_data = data.get("sites", [])
@@ -116,21 +133,27 @@ async def _run_calculation(ws: WebSocket, calc_id: str, data: dict):
        if primary_model.name not in models_used:
            models_used.insert(0, primary_model.name)

-        await ws_manager.send_progress(ws, calc_id, "Initializing", 0.05)
+        await ws_manager.send_progress(ws, calc_id, "Initializing", 0.02)

-        # ── Progress poller: reads shared dict and sends WS updates ──
+        # ── Backup progress poller: catches anything call_soon_threadsafe missed ──
        async def progress_poller():
            last_sent_seq = 0
            last_sent_pct = 0.0
+            last_sent_phase = "Initializing"
            while not _done:
-                await asyncio.sleep(0.3)
+                await asyncio.sleep(0.5)
                seq = _progress["seq"]
                pct = _progress["pct"]
                phase = _progress["phase"]
-                if seq != last_sent_seq and (pct - last_sent_pct >= 0.01 or phase != "Calculating coverage"):
+                # Send on any phase change OR >=3% progress (primary sends handle fine-grained)
+                if seq != last_sent_seq and (
+                    phase != last_sent_phase
+                    or pct - last_sent_pct >= 0.03
+                ):
                    await ws_manager.send_progress(ws, calc_id, phase, pct)
                    last_sent_seq = seq
                    last_sent_pct = pct
+                    last_sent_phase = phase

        poller_task = asyncio.create_task(progress_poller())

@@ -149,6 +172,7 @@ async def _run_calculation(ws: WebSocket, calc_id: str, data: dict):
                points = await asyncio.wait_for(
                    coverage_service.calculate_multi_site_coverage(
                        sites, settings, cancel_token,
+                        progress_fn=sync_progress_fn,
                    ),
                    timeout=300.0,
                )
@@ -170,7 +194,6 @@ async def _run_calculation(ws: WebSocket, calc_id: str, data: dict):
        # Stop poller and send final progress
        _done = True
        await poller_task
-        await ws_manager.send_progress(ws, calc_id, "Finalizing", 0.98)

        computation_time = time.time() - start_time

@@ -201,7 +224,10 @@ async def _run_calculation(ws: WebSocket, calc_id: str, data: dict):
            "models_used": models_used,
        }

+        # Send "Complete" before result so frontend shows 100%
+        await ws_manager.send_progress(ws, calc_id, "Complete", 1.0)
        await ws_manager.send_result(ws, calc_id, result)
+        logger.info(f"[WS] calc={calc_id} done: {len(points)} pts, {computation_time:.1f}s")

    except Exception as e:
        logger.error(f"[WS] Calculation error: {e}", exc_info=True)
--- a/backend/app/services/coverage_service.py
+++ b/backend/app/services/coverage_service.py
@@ -485,7 +485,16 @@ class CoverageService:
        )
        streets = _filter_osm_list_to_bbox(streets, min_lat, min_lon, max_lat, max_lon)
        water_bodies = _filter_osm_list_to_bbox(water_bodies, min_lat, min_lon, max_lat, max_lon)
-        vegetation_areas = _filter_osm_list_to_bbox(vegetation_areas, min_lat, min_lon, max_lat, max_lon)
+        # Cap vegetation at 5000 — each area requires O(samples × areas)
+        # point-in-polygon checks per grid point.  20k+ areas with dominant
+        # path enabled causes OOM via worker memory explosion.
+        vegetation_areas = _filter_osm_list_to_bbox(
+            vegetation_areas, min_lat, min_lon, max_lat, max_lon,
+            max_count=5000,
+        )
+
+        _clog(f"Filtered OSM data: {len(buildings)} bldgs, {len(streets)} streets, "
+              f"{len(water_bodies)} water, {len(vegetation_areas)} veg")

        # Build spatial index for buildings
        spatial_idx: Optional[SpatialIndex] = None
@@ -650,10 +659,13 @@ class CoverageService:
        sites: List[SiteParams],
        settings: CoverageSettings,
        cancel_token: Optional[CancellationToken] = None,
+        progress_fn: Optional[Callable[[str, float], None]] = None,
    ) -> List[CoveragePoint]:
        """
        Calculate combined coverage from multiple sites
        Best server (strongest signal) wins at each point
+
+        progress_fn(phase, pct): optional callback for progress updates (0.0-1.0).
        """
        if not sites:
            return []
@@ -661,10 +673,26 @@ class CoverageService:
        # Apply preset once
        settings = apply_preset(settings)

+        # Per-site progress tracking for averaged overall progress
+        num_sites = len(sites)
+        _site_progress = [0.0] * num_sites
+
+        def _make_site_progress(idx: int):
+            """Create a progress_fn for one site that reports scaled overall progress."""
+            def _site_fn(phase: str, pct: float, _eta=None):
+                _site_progress[idx] = pct
+                if progress_fn:
+                    overall = sum(_site_progress) / num_sites
+                    progress_fn(f"Site {idx + 1}/{num_sites}: {phase}", overall)
+            return _site_fn
+
        # Get all individual coverages
        all_coverages = await asyncio.gather(*[
-            self.calculate_coverage(site, settings, cancel_token)
-            for site in sites
+            self.calculate_coverage(
+                site, settings, cancel_token,
+                progress_fn=_make_site_progress(i) if progress_fn else None,
+            )
+            for i, site in enumerate(sites)
        ])

        # Combine by best signal
@@ -751,7 +779,8 @@ class CoverageService:
        points = []
        timing = {"los": 0.0, "buildings": 0.0, "antenna": 0.0,
                  "dominant_path": 0.0, "street_canyon": 0.0,
-                  "reflection": 0.0, "vegetation": 0.0}
+                  "reflection": 0.0, "vegetation": 0.0,
+                  "lod_none": 0, "lod_simplified": 0, "lod_full": 0}
        total = len(grid)
        log_interval = max(1, total // 20)

@@ -901,7 +930,6 @@ class CoverageService:

            # LOD_NONE: skip dominant path entirely for distant points (>3km)
            if lod == LODLevel.NONE:
-                timing.setdefault("lod_none", 0)
                timing["lod_none"] += 1
            else:
                t0 = time.time()
@@ -909,12 +937,10 @@ class CoverageService:
                    # LOD_SIMPLIFIED: limit buildings for mid-range points (1.5-3km)
                    dp_buildings = nearby_buildings
                    if lod == LODLevel.SIMPLIFIED:
-                        timing.setdefault("lod_simplified", 0)
                        timing["lod_simplified"] += 1
                        if len(nearby_buildings) > SIMPLIFIED_MAX_BUILDINGS:
                            dp_buildings = nearby_buildings[:SIMPLIFIED_MAX_BUILDINGS]
                    else:
-                        timing.setdefault("lod_full", 0)
                        timing["lod_full"] += 1

                    # nearby_buildings already filtered via spatial index —
--- a/backend/app/services/parallel_coverage_service.py
+++ b/backend/app/services/parallel_coverage_service.py
@@ -164,11 +164,16 @@ except ImportError:
    ray = None  # type: ignore


-# ── Worker-level spatial index cache (persists across tasks in same worker) ──
+# ── Worker-level caches (persist across tasks in same worker process) ──

 _worker_spatial_idx = None
 _worker_cache_key: Optional[str] = None

+# Shared-memory buildings/OSM — unpickled once per worker, cached by key
+_worker_shared_buildings = None
+_worker_shared_osm_data = None
+_worker_shared_data_key: Optional[str] = None
+

 def _ray_process_chunk_impl(chunk, terrain_cache, buildings, osm_data, config):
    """Implementation: process a chunk of (lat, lon, elevation) tuples.
@@ -205,6 +210,7 @@ def _ray_process_chunk_impl(chunk, terrain_cache, buildings, osm_data, config):
        "los": 0.0, "buildings": 0.0, "antenna": 0.0,
        "dominant_path": 0.0, "street_canyon": 0.0,
        "reflection": 0.0, "vegetation": 0.0,
+        "lod_none": 0, "lod_simplified": 0, "lod_full": 0,
    }

    precomputed = config.get('precomputed')
@@ -238,9 +244,14 @@ if RAY_AVAILABLE:


 def get_cpu_count() -> int:
-    """Get number of usable CPU cores, capped at 14."""
+    """Get number of usable CPU cores, capped at 6.
+
+    Each worker holds its own copy of buildings + OSM data + spatial index
+    (~200-400 MB per worker). Capping at 6 prevents OOM on systems with
+    8-16 GB RAM (especially WSL2 with limited memory allocation).
+    """
    try:
-        return min(mp.cpu_count() or 4, 14)
+        return min(mp.cpu_count() or 4, 6)
    except Exception:
        return 4

@@ -327,8 +338,25 @@ def calculate_coverage_parallel(
        except Exception as e:
            log_fn(f"Ray execution failed: {e} — falling back to sequential")

-    # Fallback: ProcessPoolExecutor with reduced workers to avoid MemoryError
-    pool_workers = min(num_workers, 6)
+    # Fallback: ProcessPoolExecutor (shared memory eliminates per-chunk pickle)
+    pool_workers = num_workers
+
+    # Scale workers down based on data volume to prevent OOM.
+    # Each worker unpickles + holds its own copy of buildings, OSM data, and
+    # spatial index.  With large datasets the per-worker memory can exceed
+    # 300 MB, so reduce workers to keep total under ~2 GB.
+    data_items = len(buildings) + len(streets) + len(water_bodies) + len(vegetation_areas)
+    if data_items > 20000:
+        pool_workers = min(pool_workers, 2)
+        log_fn(f"Data volume high ({data_items} items) — capping workers at {pool_workers}")
+    elif data_items > 10000:
+        pool_workers = min(pool_workers, 3)
+        log_fn(f"Data volume moderate ({data_items} items) — capping workers at {pool_workers}")
+    elif data_items > 5000:
+        pool_workers = min(pool_workers, 4)
+        log_fn(f"Data volume elevated ({data_items} items) — capping workers at {pool_workers}")
+
+    log_fn(f"ProcessPool: {pool_workers} workers (cpu_count={num_workers}, data_items={data_items})")
    if pool_workers > 1 and total_points > 100:
        try:
            return _calculate_with_process_pool(
@@ -338,6 +366,8 @@ def calculate_coverage_parallel(
                pool_workers, log_fn, cancel_token, precomputed,
                progress_fn,
            )
+        except (MemoryError, OSError) as e:
+            log_fn(f"ProcessPool OOM/OS error: {e} — falling back to sequential")
        except Exception as e:
            log_fn(f"ProcessPool failed: {e} — falling back to sequential")

@@ -396,8 +426,8 @@ def _calculate_with_ray(
        for lat, lon in grid
    ]

-    # ~4 chunks per worker for granular progress
-    chunk_size = max(1, len(items) // (num_workers * 4))
+    # Larger chunks to amortize IPC overhead (was num_workers*4)
+    chunk_size = max(1, min(400, len(items) // max(2, num_workers)))
    chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
    log_fn(f"Submitting {len(chunks)} chunks of ~{chunk_size} points")

@@ -489,6 +519,7 @@ def _pool_worker_process_chunk(args):
        "los": 0.0, "buildings": 0.0, "antenna": 0.0,
        "dominant_path": 0.0, "street_canyon": 0.0,
        "reflection": 0.0, "vegetation": 0.0,
+        "lod_none": 0, "lod_simplified": 0, "lod_full": 0,
    }

    precomputed = config.get('precomputed')
@@ -542,6 +573,28 @@ def _store_terrain_in_shm(terrain_cache: Dict[str, np.ndarray], log_fn) -> Tuple
    return blocks, refs


+def _store_pickle_in_shm(data, label: str, log_fn) -> Tuple[Optional[Any], Optional[dict]]:
+    """Pickle arbitrary data into a SharedMemory block.
+
+    Returns (shm_block, ref_dict) where ref_dict = {shm_name, size}.
+    On failure returns (None, None) and caller should fall back to pickle.
+    """
+    import multiprocessing.shared_memory as shm_mod
+    import pickle
+
+    try:
+        blob = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
+        size = len(blob)
+        block = shm_mod.SharedMemory(create=True, size=size)
+        block.buf[:size] = blob
+        mb = size / (1024 * 1024)
+        log_fn(f"{label} in shared memory: {mb:.1f} MB")
+        return block, {'shm_name': block.name, 'size': size}
+    except Exception as e:
+        log_fn(f"Failed to store {label} in shm: {e}")
+        return None, None
+
+
 def _pool_worker_shm_chunk(args):
    """Worker function that reads terrain from shared memory instead of pickle."""
    import multiprocessing.shared_memory as shm_mod
@@ -585,6 +638,7 @@ def _pool_worker_shm_chunk(args):
        "los": 0.0, "buildings": 0.0, "antenna": 0.0,
        "dominant_path": 0.0, "street_canyon": 0.0,
        "reflection": 0.0, "vegetation": 0.0,
+        "lod_none": 0, "lod_simplified": 0, "lod_full": 0,
    }

    precomputed = config.get('precomputed')
@@ -607,6 +661,200 @@ def _pool_worker_shm_chunk(args):
    return results


+_worker_chunk_count: int = 0  # per-worker chunk counter
+
+
+def _pool_worker_shm_shared(args):
+    """Worker: terrain + buildings + OSM all via shared memory.
+
+    Per-chunk args are tiny (~8 KB): just point coords, shm refs, and config.
+    Buildings and OSM data are unpickled from shared memory ONCE per worker
+    and cached in module globals for subsequent chunks.
+    """
+    import multiprocessing.shared_memory as shm_mod
+    import pickle
+
+    global _worker_chunk_count
+    _worker_chunk_count += 1
+    pid = os.getpid()
+    t_worker_start = time.perf_counter()
+
+    chunk, terrain_shm_refs, shared_data_refs, config = args
+
+    # ── Reconstruct terrain from shared memory ──
+    t0 = time.perf_counter()
+    terrain_cache = {}
+    for tile_name, ref in terrain_shm_refs.items():
+        try:
+            block = shm_mod.SharedMemory(name=ref['shm_name'])
+            terrain_cache[tile_name] = np.ndarray(
+                ref['shape'], dtype=ref['dtype'], buffer=block.buf,
+            )
+        except Exception:
+            pass
+
+    from app.services.terrain_service import terrain_service
+    terrain_service._tile_cache = terrain_cache
+    t_terrain_shm = time.perf_counter() - t0
+
+    # ── Read buildings + OSM from shared memory (cached per worker) ──
+    global _worker_shared_buildings, _worker_shared_osm_data, _worker_shared_data_key
+    global _worker_spatial_idx, _worker_cache_key
+
+    data_key = config.get('cache_key', '')
+    cached = (_worker_shared_data_key == data_key)
+
+    t_unpickle_bld = 0.0
+    t_unpickle_osm = 0.0
+    t_spatial = 0.0
+
+    if not cached:
+        # First chunk for this calculation — unpickle from shm
+        buildings_ref = shared_data_refs.get('buildings')
+        osm_ref = shared_data_refs.get('osm_data')
+
+        if buildings_ref:
+            try:
+                t0 = time.perf_counter()
+                blk = shm_mod.SharedMemory(name=buildings_ref['shm_name'])
+                _worker_shared_buildings = pickle.loads(bytes(blk.buf[:buildings_ref['size']]))
+                t_unpickle_bld = time.perf_counter() - t0
+            except Exception:
+                _worker_shared_buildings = []
+        else:
+            _worker_shared_buildings = []
+
+        if osm_ref:
+            try:
+                t0 = time.perf_counter()
+                blk = shm_mod.SharedMemory(name=osm_ref['shm_name'])
+                _worker_shared_osm_data = pickle.loads(bytes(blk.buf[:osm_ref['size']]))
+                t_unpickle_osm = time.perf_counter() - t0
+            except Exception:
+                _worker_shared_osm_data = {}
+        else:
+            _worker_shared_osm_data = {}
+
+        _worker_shared_data_key = data_key
+
+        # Rebuild spatial index for new data
+        t0 = time.perf_counter()
+        if _worker_shared_buildings:
+            from app.services.spatial_index import SpatialIndex
+            _worker_spatial_idx = SpatialIndex()
+            _worker_spatial_idx.build(_worker_shared_buildings)
+        else:
+            _worker_spatial_idx = None
+        _worker_cache_key = data_key
+        t_spatial = time.perf_counter() - t0
+
+        print(
+            f"[WORKER {pid}] Init: terrain_shm={t_terrain_shm*1000:.1f}ms "
+            f"unpickle_bld={t_unpickle_bld*1000:.1f}ms "
+            f"unpickle_osm={t_unpickle_osm*1000:.1f}ms "
+            f"spatial={t_spatial*1000:.1f}ms "
+            f"buildings={len(_worker_shared_buildings or [])} "
+            f"tiles={len(terrain_cache)}",
+            flush=True,
+        )
+
+    print(
+        f"[WORKER {pid}] Processing chunk {_worker_chunk_count}, "
+        f"cached={cached}, points={len(chunk)}",
+        flush=True,
+    )
+
+    buildings = _worker_shared_buildings or []
+    osm_data = _worker_shared_osm_data or {}
+
+    # ── Imports + object creation (timed) ──
+    t0 = time.perf_counter()
+    from app.services.coverage_service import CoverageService, SiteParams, CoverageSettings
+    t_import = time.perf_counter() - t0
+
+    t0 = time.perf_counter()
+    site = SiteParams(**config['site_dict'])
+    settings = CoverageSettings(**config['settings_dict'])
+    svc = CoverageService()
+    t_pydantic = time.perf_counter() - t0
+
+    timing = {
+        "los": 0.0, "buildings": 0.0, "antenna": 0.0,
+        "dominant_path": 0.0, "street_canyon": 0.0,
+        "reflection": 0.0, "vegetation": 0.0,
+        "lod_none": 0, "lod_simplified": 0, "lod_full": 0,
+    }
+
+    precomputed = config.get('precomputed')
+
+    streets = osm_data.get('streets', [])
+    water = osm_data.get('water_bodies', [])
+    veg = osm_data.get('vegetation_areas', [])
+    site_elev = config['site_elevation']
+
+    t_init_done = time.perf_counter()
+    init_ms = (t_init_done - t_worker_start) * 1000
+
+    # ── Process points with per-point profiling (first 3 only) ──
+    results = []
+    t_loop_start = time.perf_counter()
+    t_model_dump_total = 0.0
+    n_dumped = 0
+
+    for i, (lat, lon, point_elev) in enumerate(chunk):
+        pre = precomputed.get((lat, lon)) if precomputed else None
+
+        # Snapshot timing dict before call (for first 3 points)
+        if i < 3:
+            timing_before = {k: v for k, v in timing.items()}
+            t_pt = time.perf_counter()
+
+        point = svc._calculate_point_sync(
+            site, lat, lon, settings,
+            buildings, streets,
+            _worker_spatial_idx, water, veg,
+            site_elev, point_elev, timing,
+            precomputed_distance=pre.get('distance') if pre else None,
+            precomputed_path_loss=pre.get('path_loss') if pre else None,
+        )
+
+        if i < 3:
+            t_pt_done = time.perf_counter()
+            pt_ms = (t_pt_done - t_pt) * 1000
+            deltas = {k: (timing[k] - timing_before.get(k, 0)) * 1000 for k in timing}
+            parts = " ".join(f"{k}={v:.2f}" for k, v in deltas.items() if v > 0.001)
+            print(
+                f"[WORKER {pid}] Point {i}: {pt_ms:.2f}ms "
+                f"rsrp={point.rsrp:.1f} dist={point.distance:.0f}m "
+                f"breakdown=[{parts}]",
+                flush=True,
+            )
+
+        if point.rsrp >= settings.min_signal:
+            t_md = time.perf_counter()
+            results.append(point.model_dump())
+            t_model_dump_total += time.perf_counter() - t_md
+            n_dumped += 1
+
+    t_loop_done = time.perf_counter()
+    loop_ms = (t_loop_done - t_loop_start) * 1000
+    total_ms = (t_loop_done - t_worker_start) * 1000
+    avg_pt = loop_ms / len(chunk) if chunk else 0
+    avg_dump = (t_model_dump_total * 1000 / n_dumped) if n_dumped else 0
+
+    print(
+        f"[WORKER {pid}] Chunk done: total={total_ms:.0f}ms "
+        f"init={init_ms:.0f}ms loop={loop_ms:.0f}ms "
+        f"avg_pt={avg_pt:.2f}ms model_dump={avg_dump:.2f}ms×{n_dumped} "
+        f"import={t_import*1000:.1f}ms pydantic={t_pydantic*1000:.1f}ms "
+        f"terrain_shm={t_terrain_shm*1000:.1f}ms "
+        f"results={len(results)}/{len(chunk)}",
+        flush=True,
+    )
+
+    return results
+
+
 def _calculate_with_process_pool(
    grid, point_elevations, site_dict, settings_dict,
    terrain_cache, buildings, streets, water_bodies,
@@ -616,23 +864,28 @@ def _calculate_with_process_pool(
 ):
    """Execute using ProcessPoolExecutor.

-    Uses shared memory for terrain tiles (zero-copy numpy views) to reduce
-    memory usage compared to pickling full terrain arrays per worker.
+    Uses shared memory for terrain tiles (zero-copy numpy views), buildings,
+    and OSM data (pickle-once, read-many) to eliminate per-chunk serialization
+    overhead.
    """
    from concurrent.futures import ProcessPoolExecutor, as_completed

    total_points = len(grid)
-
-    # Estimate pickle size for building data and cap workers accordingly
    building_count = len(buildings)
-    if building_count > 10000:
-        num_workers = min(num_workers, 3)
-        log_fn(f"Large building set ({building_count}) — reducing workers to {num_workers}")
-    elif building_count > 5000:
-        num_workers = min(num_workers, 4)
+    data_items = building_count + len(streets) + len(water_bodies) + len(vegetation_areas)

    log_fn(f"ProcessPool mode: {total_points} points, {num_workers} workers, "
-           f"{building_count} buildings")
+           f"{building_count} buildings, {data_items} total OSM items")
+
+    # Log memory at start
+    try:
+        with open('/proc/self/status') as f:
+            for line in f:
+                if line.startswith('VmRSS:'):
+                    log_fn(f"Memory before calculation: {line.strip()}")
+                    break
+    except Exception:
+        pass

    # Store terrain tiles in shared memory
    shm_blocks = []
@@ -652,12 +905,31 @@ def _calculate_with_process_pool(
        log_fn(f"Shared memory setup failed ({e}), using pickle fallback")
        use_shm = False

+    # Store buildings + OSM data in shared memory (pickle once, read many)
+    shared_data_refs = {}
+    if use_shm:
+        bld_block, bld_ref = _store_pickle_in_shm(buildings, "Buildings", log_fn)
+        if bld_block:
+            shm_blocks.append(bld_block)
+            shared_data_refs['buildings'] = bld_ref
+
+        osm_data_dict = {
+            'streets': streets,
+            'water_bodies': water_bodies,
+            'vegetation_areas': vegetation_areas,
+        }
+        osm_block, osm_ref = _store_pickle_in_shm(osm_data_dict, "OSM data", log_fn)
+        if osm_block:
+            shm_blocks.append(osm_block)
+            shared_data_refs['osm_data'] = osm_ref
+
    items = [
        (lat, lon, point_elevations.get((lat, lon), 0.0))
        for lat, lon in grid
    ]

-    chunk_size = max(1, len(items) // (num_workers * 2))
+    # Target larger chunks to amortize IPC overhead (was num_workers*2)
+    chunk_size = max(1, min(400, len(items) // max(2, num_workers)))
    chunks = [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]
    log_fn(f"Submitting {len(chunks)} chunks of ~{chunk_size} points")

@@ -685,8 +957,21 @@ def _calculate_with_process_pool(
        pool = ProcessPoolExecutor(max_workers=num_workers, mp_context=ctx)
        _set_active_pool(pool)

-        if use_shm:
-            # Shared memory path: pass shm refs instead of terrain data
+        if use_shm and shared_data_refs:
+            # Full shared memory path: terrain + buildings + OSM all via shm
+            worker_fn = _pool_worker_shm_shared
+            futures = {
+                pool.submit(
+                    worker_fn,
+                    (chunk, terrain_shm_refs, shared_data_refs, config),
+                ): i
+                for i, chunk in enumerate(chunks)
+            }
+        elif use_shm and data_items <= 2000:
+            # Terrain-only shm — buildings/OSM pickled per chunk.
+            # Only safe for small datasets; large datasets would OOM from
+            # pickle copies (num_chunks × pickle_size).
+            log_fn(f"Terrain-only shm (small data: {data_items} items)")
            worker_fn = _pool_worker_shm_chunk
            futures = {
                pool.submit(
@@ -695,8 +980,9 @@ def _calculate_with_process_pool(
                ): i
                for i, chunk in enumerate(chunks)
            }
-        else:
-            # Pickle fallback path
+        elif data_items <= 2000:
+            # Full pickle fallback — only safe for small datasets
+            log_fn(f"Full pickle path (small data: {data_items} items)")
            futures = {
                pool.submit(
                    _pool_worker_process_chunk,
@@ -704,6 +990,14 @@ def _calculate_with_process_pool(
                ): i
                for i, chunk in enumerate(chunks)
            }
+        else:
+            # Large dataset + shared memory failed → per-chunk pickle would OOM.
+            # Bail out; caller will fall back to sequential.
+            log_fn(f"Shared memory failed for large dataset ({data_items} items) "
+                   f"— skipping ProcessPool to avoid OOM")
+            raise MemoryError(
+                f"Cannot safely pickle {data_items} OSM items per chunk"
+            )

        completed_chunks = 0
        for future in as_completed(futures):
@@ -730,6 +1024,9 @@ def _calculate_with_process_pool(
            if progress_fn:
                progress_fn("Calculating coverage", 0.40 + 0.55 * (completed_chunks / len(chunks)))

+    except MemoryError:
+        raise  # Propagate to caller for sequential fallback
+
    except Exception as e:
        log_fn(f"ProcessPool error: {e}")

@@ -748,8 +1045,22 @@ def _calculate_with_process_pool(
                block.unlink()
            except Exception:
                pass
+        # Release large local references before GC
+        chunks = None  # noqa: F841
+        items = None  # noqa: F841
+        osm_data = None  # noqa: F841
+        shared_data_refs = None  # noqa: F841
        # Force garbage collection to release memory from workers
        gc.collect()
+        # Log memory after cleanup
+        try:
+            with open('/proc/self/status') as f:
+                for line in f:
+                    if line.startswith('VmRSS:'):
+                        log_fn(f"Memory after cleanup: {line.strip()}")
+                        break
+        except Exception:
+            pass

    calc_time = time.time() - t_calc
    log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "
@@ -758,7 +1069,11 @@ def _calculate_with_process_pool(
    timing = {
        "parallel_total": calc_time,
        "workers": num_workers,
-        "backend": "process_pool" + ("/shm" if use_shm else "/pickle"),
+        "backend": "process_pool" + (
+            "/shm_full" if (use_shm and shared_data_refs)
+            else "/shm_terrain" if use_shm
+            else "/pickle"
+        ),
    }
    return all_results, timing

@@ -791,6 +1106,7 @@ def _calculate_sequential(
        "los": 0.0, "buildings": 0.0, "antenna": 0.0,
        "dominant_path": 0.0, "street_canyon": 0.0,
        "reflection": 0.0, "vegetation": 0.0,
+        "lod_none": 0, "lod_simplified": 0, "lod_full": 0,
    }

    t0 = time.time()