diff --git a/RFCP-Iteration-3.2.2-Diagnostic.md b/RFCP-Iteration-3.2.2-Diagnostic.md new file mode 100644 index 0000000..72224ae --- /dev/null +++ b/RFCP-Iteration-3.2.2-Diagnostic.md @@ -0,0 +1,261 @@ +# RFCP - Iteration 3.2.2: Dominant Path Performance Diagnostic + +## Overview + +LOD is working (5 buildings instead of 25) but performance is still ~340ms/point. +This should be ~15x faster but it's almost the same speed. Need to find the bottleneck. + +**Observed:** +``` +[DOMINANT_PATH_VEC] Point #1: buildings=5, walls=50, dist=2946m +338.8ms/point average +``` + +**Expected:** +``` +5 buildings × 50 walls should be ~20-30ms/point, not 340ms +``` + +--- + +## Task 1: Add Detailed Timing to Dominant Path + +**File:** `backend/app/services/dominant_path_service.py` + +Add timing breakdown to understand where time is spent: + +```python +import time +import logging + +logger = logging.getLogger(__name__) + +def find_dominant_path_with_lod( + tx_lat: float, tx_lon: float, tx_height: float, + rx_lat: float, rx_lon: float, rx_height: float, + frequency_mhz: float, + buildings: list, + distance_m: float = None, + spatial_idx = None, # May be passed in +) -> dict: + """Find dominant path with LOD and detailed timing.""" + + timings = {} + t_total_start = time.perf_counter() + + # 1. Distance calculation + t_start = time.perf_counter() + if distance_m is None: + from app.services.terrain_service import TerrainService + distance_m = TerrainService.haversine_distance(tx_lat, tx_lon, rx_lat, rx_lon) + timings['distance_calc'] = (time.perf_counter() - t_start) * 1000 + + # 2. LOD level determination + t_start = time.perf_counter() + lod = get_lod_level(distance_m) + timings['lod_check'] = (time.perf_counter() - t_start) * 1000 + + # 3. Early return for LOD_NONE + if lod == LODLevel.NONE: + timings['total'] = (time.perf_counter() - t_total_start) * 1000 + logger.debug(f"[DP_TIMING] LOD_NONE dist={distance_m:.0f}m total={timings['total']:.2f}ms") + return { + "path_loss_db": 0.0, + "lod_level": "none", + "buildings_checked": 0, + "walls_checked": 0, + "skipped": True, + "timings": timings + } + + # 4. Building filtering for LOD_SIMPLIFIED + t_start = time.perf_counter() + buildings_to_check = buildings + if lod == LODLevel.SIMPLIFIED and buildings: + # This filtering might be slow! + if len(buildings) > SIMPLIFIED_MAX_BUILDINGS: + mid_lat = (tx_lat + rx_lat) / 2 + mid_lon = (tx_lon + rx_lon) / 2 + + buildings_with_dist = [] + for b in buildings: + geom = b.get('geometry', {}) + coords = geom.get('coordinates', [[]])[0] if isinstance(geom, dict) else b.get('geometry', [[]]) + + if coords and len(coords) > 0: + if isinstance(coords[0], (list, tuple)): + blat = sum(c[1] for c in coords) / len(coords) + blon = sum(c[0] for c in coords) / len(coords) + else: + blat = sum(c.get('lat', c.get('y', 0)) for c in coords) / len(coords) + blon = sum(c.get('lon', c.get('x', 0)) for c in coords) / len(coords) + + from app.services.terrain_service import TerrainService + dist = TerrainService.haversine_distance(mid_lat, mid_lon, blat, blon) + buildings_with_dist.append((dist, b)) + + buildings_with_dist.sort(key=lambda x: x[0]) + buildings_to_check = [b for _, b in buildings_with_dist[:SIMPLIFIED_MAX_BUILDINGS]] + timings['building_filter'] = (time.perf_counter() - t_start) * 1000 + + # 5. Wall extraction + t_start = time.perf_counter() + # ... wall extraction code ... + timings['wall_extraction'] = (time.perf_counter() - t_start) * 1000 + + # 6. Geometry calculations (intersections, reflections) + t_start = time.perf_counter() + # ... geometry code ... + timings['geometry_calc'] = (time.perf_counter() - t_start) * 1000 + + # Total + timings['total'] = (time.perf_counter() - t_total_start) * 1000 + + # Log timing breakdown + logger.info( + f"[DP_TIMING] LOD={lod.value} dist={distance_m:.0f}m " + f"bldgs={len(buildings_to_check)} " + f"filter={timings.get('building_filter', 0):.1f}ms " + f"walls={timings.get('wall_extraction', 0):.1f}ms " + f"geom={timings.get('geometry_calc', 0):.1f}ms " + f"total={timings['total']:.1f}ms" + ) + + result["timings"] = timings + return result +``` + +--- + +## Task 2: Check if Building Filtering is the Bottleneck + +The LOD_SIMPLIFIED filtering iterates through ALL 15000 buildings to find 5 nearest. +This is O(n) for every point! + +**Potential fix - use spatial index:** + +```python +# Instead of iterating all buildings: +if spatial_idx is not None: + # Use spatial index to get nearby buildings quickly + nearby = spatial_idx.query_radius(mid_lat, mid_lon, radius=500) # 500m radius + buildings_to_check = nearby[:SIMPLIFIED_MAX_BUILDINGS] +else: + # Fallback to slow method + ... +``` + +--- + +## Task 3: Check Coverage Service Integration + +**File:** `backend/app/services/coverage_service.py` + +Find where dominant_path is called and check: +1. Is spatial_idx being passed? +2. Is building list pre-filtered or full 15000? +3. Are buildings being re-processed for each point? + +Look for patterns like: +```python +# BAD - full list passed to every point +for point in points: + result = find_dominant_path_with_lod(..., buildings=all_buildings) + +# GOOD - pre-filter by distance to point +for point in points: + nearby = spatial_idx.query(point) + result = find_dominant_path_with_lod(..., buildings=nearby) +``` + +--- + +## Task 4: Add Summary Statistics + +At the end of coverage calculation, log timing summary: + +```python +# In coverage_service.py after all points calculated: +if timing_data: + avg_filter = sum(t.get('building_filter', 0) for t in timing_data) / len(timing_data) + avg_geom = sum(t.get('geometry_calc', 0) for t in timing_data) / len(timing_data) + avg_total = sum(t.get('total', 0) for t in timing_data) / len(timing_data) + + logger.info( + f"[DP_SUMMARY] {len(timing_data)} points: " + f"avg_filter={avg_filter:.1f}ms, avg_geom={avg_geom:.1f}ms, " + f"avg_total={avg_total:.1f}ms" + ) +``` + +--- + +## Task 5: Quick Win - Skip Filtering for LOD_NONE + +Make sure LOD_NONE returns IMMEDIATELY without touching buildings list: + +```python +def find_dominant_path_with_lod(...): + # FIRST thing - check LOD + if distance_m is None: + distance_m = calculate_distance(...) + + lod = get_lod_level(distance_m) + + # IMMEDIATE return for LOD_NONE - don't even look at buildings + if lod == LODLevel.NONE: + return {"path_loss_db": 0.0, "skipped": True, "lod_level": "none"} + + # Only now process buildings... +``` + +--- + +## Expected Output + +After implementing, logs should show: + +``` +[DP_TIMING] LOD=none dist=4500m total=0.05ms +[DP_TIMING] LOD=simplified dist=2500m bldgs=5 filter=250.0ms walls=2.0ms geom=5.0ms total=258.0ms +[DP_TIMING] LOD=full dist=800m bldgs=25 filter=0.0ms walls=5.0ms geom=50.0ms total=56.0ms +``` + +This will show us exactly where the 340ms is being spent. + +--- + +## Suspected Root Cause + +**Building filtering is O(15000) for every point!** + +Even with LOD_SIMPLIFIED, we iterate through 15000 buildings to find 5 nearest. +868 points × 15000 buildings = 13 million iterations just for filtering! + +**Fix:** Use spatial index to get nearby buildings in O(log n) instead of O(n). + +--- + +## Testing + +After implementing diagnostics: + +```powershell +cd D:\root\rfcp\installer +.\test-detailed-quick.bat +``` + +Check logs for `[DP_TIMING]` and `[DP_SUMMARY]` lines. + +--- + +## Success Criteria + +1. Logs show timing breakdown for each component +2. Identify which step takes most time (filter vs geometry) +3. If filter is slow → implement spatial index fix +4. If geometry is slow → investigate vectorized calculations + +--- + +*"You can't optimize what you can't measure"*