@mytec: iter2.4.2 start
This commit is contained in:
495
RFCP-Phase-2.4.2-Final-Fixes.md
Normal file
495
RFCP-Phase-2.4.2-Final-Fixes.md
Normal file
@@ -0,0 +1,495 @@
|
||||
# RFCP Phase 2.4.2: Final Critical Fixes
|
||||
|
||||
**Date:** February 1, 2025
|
||||
**Type:** Bug Fixes
|
||||
**Priority:** CRITICAL
|
||||
**Depends on:** Phase 2.4.1
|
||||
|
||||
---
|
||||
|
||||
## 🎯 Summary
|
||||
|
||||
Phase 2.4.1 частково спрацював, але залишились проблеми:
|
||||
- Memory leak — workers не вбиваються (psutil не працює для grandchildren)
|
||||
- Dominant path — все ще timeout (фільтр можливо не застосовується)
|
||||
- Elevation — все зелене (немає локального контрасту)
|
||||
|
||||
---
|
||||
|
||||
## 🔴 Bug 2.4.2a: Memory Leak — Nuclear Kill
|
||||
|
||||
**Problem:**
|
||||
psutil.Process.children() не бачить grandchildren (worker subprocess → python subprocess).
|
||||
Після cleanup все ще 8× rfcp-server.exe, 8GB RAM.
|
||||
|
||||
**File:** `backend/app/services/parallel_coverage_service.py`
|
||||
|
||||
**Current code doesn't work:**
|
||||
```python
|
||||
current = psutil.Process(os.getpid())
|
||||
children = current.children(recursive=True)
|
||||
for child in children:
|
||||
child.terminate()
|
||||
```
|
||||
|
||||
**Fix — kill by process NAME, not by PID tree:**
|
||||
|
||||
```python
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def _kill_worker_processes():
|
||||
"""
|
||||
Nuclear option: kill ALL rfcp-server processes except main.
|
||||
This handles grandchildren that psutil can't see.
|
||||
"""
|
||||
my_pid = os.getpid()
|
||||
killed_count = 0
|
||||
|
||||
if sys.platform == 'win32':
|
||||
# Windows: use tasklist to find all rfcp-server.exe, kill all except self
|
||||
try:
|
||||
# Get list of all rfcp-server PIDs
|
||||
result = subprocess.run(
|
||||
['tasklist', '/FI', 'IMAGENAME eq rfcp-server.exe', '/FO', 'CSV', '/NH'],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if 'rfcp-server.exe' in line:
|
||||
# Parse PID from CSV: "rfcp-server.exe","1234",...
|
||||
parts = line.split(',')
|
||||
if len(parts) >= 2:
|
||||
pid_str = parts[1].strip().strip('"')
|
||||
try:
|
||||
pid = int(pid_str)
|
||||
if pid != my_pid:
|
||||
subprocess.run(
|
||||
['taskkill', '/F', '/PID', str(pid)],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
killed_count += 1
|
||||
_clog(f"Killed worker PID {pid}")
|
||||
except (ValueError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
except Exception as e:
|
||||
_clog(f"Kill workers error: {e}")
|
||||
# Fallback: kill ALL rfcp-server except hope main survives
|
||||
try:
|
||||
subprocess.run(
|
||||
['taskkill', '/F', '/IM', 'rfcp-server.exe', '/T'],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
# Unix: use pgrep/pkill
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['pgrep', '-f', 'rfcp-server'],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
for pid_str in result.stdout.strip().split('\n'):
|
||||
if pid_str:
|
||||
try:
|
||||
pid = int(pid_str)
|
||||
if pid != my_pid:
|
||||
os.kill(pid, 9) # SIGKILL
|
||||
killed_count += 1
|
||||
_clog(f"Killed worker PID {pid}")
|
||||
except (ValueError, ProcessLookupError, PermissionError):
|
||||
pass
|
||||
except Exception as e:
|
||||
_clog(f"Kill workers error: {e}")
|
||||
|
||||
return killed_count
|
||||
```
|
||||
|
||||
**Also update ProcessPoolExecutor to use spawn context explicitly:**
|
||||
|
||||
```python
|
||||
import multiprocessing as mp
|
||||
|
||||
def _calculate_with_process_pool(...):
|
||||
# Use spawn to ensure clean worker processes
|
||||
ctx = mp.get_context('spawn')
|
||||
pool = None
|
||||
|
||||
try:
|
||||
pool = ProcessPoolExecutor(
|
||||
max_workers=num_workers,
|
||||
mp_context=ctx
|
||||
)
|
||||
# ... rest of code ...
|
||||
finally:
|
||||
if pool:
|
||||
pool.shutdown(wait=False, cancel_futures=True)
|
||||
|
||||
# Give pool time to cleanup
|
||||
import time
|
||||
time.sleep(0.5)
|
||||
|
||||
# Then force kill any survivors
|
||||
killed = _kill_worker_processes()
|
||||
if killed > 0:
|
||||
_clog(f"Force killed {killed} orphaned workers")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔴 Bug 2.4.2b: Dominant Path — Add Logging + Reduce Limits
|
||||
|
||||
**Problem:**
|
||||
Detailed preset still timeouts. Unknown if filter is being applied.
|
||||
|
||||
**File:** `backend/app/services/dominant_path_service.py`
|
||||
|
||||
**Add diagnostic logging to verify filter works:**
|
||||
|
||||
```python
|
||||
# At the TOP of the file, add constants (if not already there):
|
||||
MAX_BUILDINGS_FOR_LINE = 50 # Reduced from 100
|
||||
MAX_BUILDINGS_FOR_REFLECTION = 30 # Reduced from 100
|
||||
MAX_DISTANCE_FROM_PATH = 300 # Reduced from 500m
|
||||
|
||||
def _filter_buildings_by_distance(buildings, tx_point, rx_point, max_count, max_distance):
|
||||
"""Filter buildings to nearest N within max_distance of path."""
|
||||
|
||||
original_count = len(buildings)
|
||||
|
||||
if original_count <= max_count:
|
||||
_log(f"[FILTER] {original_count} buildings, no filter needed")
|
||||
return buildings
|
||||
|
||||
# Calculate midpoint
|
||||
mid_lat = (tx_point[0] + rx_point[0]) / 2
|
||||
mid_lon = (tx_point[1] + rx_point[1]) / 2
|
||||
|
||||
# Calculate squared distance to midpoint (no sqrt for speed)
|
||||
def dist_sq(b):
|
||||
blat = b.get('centroid_lat') or b.get('lat', mid_lat)
|
||||
blon = b.get('centroid_lon') or b.get('lon', mid_lon)
|
||||
dlat = (blat - mid_lat) * 111000
|
||||
dlon = (blon - mid_lon) * 111000 * 0.65 # cos(50°) ≈ 0.65
|
||||
return dlat*dlat + dlon*dlon
|
||||
|
||||
# Sort by distance
|
||||
buildings_sorted = sorted(buildings, key=dist_sq)
|
||||
|
||||
# Filter by max distance
|
||||
max_dist_sq = max_distance * max_distance
|
||||
filtered = [b for b in buildings_sorted if dist_sq(b) <= max_dist_sq]
|
||||
|
||||
# Take top N
|
||||
result = filtered[:max_count]
|
||||
|
||||
_log(f"[FILTER] {original_count} → {len(result)} buildings (max_count={max_count}, max_dist={max_distance}m)")
|
||||
|
||||
return result
|
||||
```
|
||||
|
||||
**Verify the filter is CALLED in the main function:**
|
||||
|
||||
```python
|
||||
def find_dominant_path_sync(tx, rx, buildings, vegetation, spatial_idx, frequency, ...):
|
||||
"""Find dominant propagation path."""
|
||||
|
||||
# Get buildings from spatial index
|
||||
line_buildings_raw = spatial_idx.query_line(tx['lat'], tx['lon'], rx['lat'], rx['lon'])
|
||||
|
||||
# FILTER - this MUST be called
|
||||
line_buildings = _filter_buildings_by_distance(
|
||||
line_buildings_raw,
|
||||
(tx['lat'], tx['lon']),
|
||||
(rx['lat'], rx['lon']),
|
||||
max_count=MAX_BUILDINGS_FOR_LINE,
|
||||
max_distance=MAX_DISTANCE_FROM_PATH
|
||||
)
|
||||
|
||||
# Same for reflection candidates
|
||||
mid_lat = (tx['lat'] + rx['lat']) / 2
|
||||
mid_lon = (tx['lon'] + rx['lon']) / 2
|
||||
refl_buildings_raw = spatial_idx.query_point(mid_lat, mid_lon, buffer_cells=3)
|
||||
|
||||
refl_buildings = _filter_buildings_by_distance(
|
||||
refl_buildings_raw,
|
||||
(tx['lat'], tx['lon']),
|
||||
(rx['lat'], rx['lon']),
|
||||
max_count=MAX_BUILDINGS_FOR_REFLECTION,
|
||||
max_distance=MAX_DISTANCE_FROM_PATH
|
||||
)
|
||||
|
||||
# Update diagnostic log to show FILTERED counts
|
||||
if _point_counter[0] <= 3:
|
||||
print(f"[DOMINANT_PATH] Point #{_point_counter[0]}: "
|
||||
f"line_bldgs={len(line_buildings)} (was {len(line_buildings_raw)}), "
|
||||
f"refl_bldgs={len(refl_buildings)} (was {len(refl_buildings_raw)})")
|
||||
|
||||
# ... rest of function ...
|
||||
```
|
||||
|
||||
**If still too slow — option to DISABLE reflections:**
|
||||
|
||||
```python
|
||||
# Quick fix: skip reflection calculation entirely
|
||||
ENABLE_REFLECTIONS = False # Set to True when performance is fixed
|
||||
|
||||
def find_dominant_path_sync(...):
|
||||
# Direct path
|
||||
direct_loss = calculate_direct_path(...)
|
||||
|
||||
if not ENABLE_REFLECTIONS:
|
||||
return direct_loss
|
||||
|
||||
# Reflection paths (slow)
|
||||
reflection_loss = calculate_reflections(...)
|
||||
|
||||
return min(direct_loss, reflection_loss)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🟡 Bug 2.4.2c: Elevation Layer — Local Min/Max Contrast
|
||||
|
||||
**Problem:**
|
||||
All green because using absolute thresholds (100m, 150m, 200m...) but local terrain varies only 150-200m.
|
||||
|
||||
**File:** `frontend/src/components/map/ElevationLayer.tsx`
|
||||
|
||||
**Fix — use RELATIVE coloring based on local min/max:**
|
||||
|
||||
```tsx
|
||||
// Color palette (keep these)
|
||||
const COLORS = {
|
||||
DEEP_BLUE: [33, 102, 172], // Lowest
|
||||
LIGHT_BLUE: [103, 169, 207],
|
||||
GREEN: [145, 207, 96],
|
||||
YELLOW: [254, 224, 139],
|
||||
ORANGE: [252, 141, 89],
|
||||
BROWN: [215, 48, 39], // Highest
|
||||
};
|
||||
|
||||
// Interpolate between two colors
|
||||
function interpolateColor(
|
||||
color1: number[],
|
||||
color2: number[],
|
||||
t: number
|
||||
): [number, number, number] {
|
||||
return [
|
||||
Math.round(color1[0] + (color2[0] - color1[0]) * t),
|
||||
Math.round(color1[1] + (color2[1] - color1[1]) * t),
|
||||
Math.round(color1[2] + (color2[2] - color1[2]) * t),
|
||||
];
|
||||
}
|
||||
|
||||
// NEW: Get color based on NORMALIZED elevation (0-1)
|
||||
function getColorForNormalizedElevation(normalized: number): [number, number, number] {
|
||||
// Clamp to 0-1
|
||||
const n = Math.max(0, Math.min(1, normalized));
|
||||
|
||||
if (n < 0.2) {
|
||||
// 0-20%: deep blue → light blue
|
||||
return interpolateColor(COLORS.DEEP_BLUE, COLORS.LIGHT_BLUE, n / 0.2);
|
||||
} else if (n < 0.4) {
|
||||
// 20-40%: light blue → green
|
||||
return interpolateColor(COLORS.LIGHT_BLUE, COLORS.GREEN, (n - 0.2) / 0.2);
|
||||
} else if (n < 0.6) {
|
||||
// 40-60%: green → yellow
|
||||
return interpolateColor(COLORS.GREEN, COLORS.YELLOW, (n - 0.4) / 0.2);
|
||||
} else if (n < 0.8) {
|
||||
// 60-80%: yellow → orange
|
||||
return interpolateColor(COLORS.YELLOW, COLORS.ORANGE, (n - 0.6) / 0.2);
|
||||
} else {
|
||||
// 80-100%: orange → brown
|
||||
return interpolateColor(COLORS.ORANGE, COLORS.BROWN, (n - 0.8) / 0.2);
|
||||
}
|
||||
}
|
||||
|
||||
// In the main render function, USE local min/max:
|
||||
useEffect(() => {
|
||||
// ... fetch elevation data ...
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
// Get LOCAL min/max from the actual data
|
||||
const minElev = data.min_elevation; // e.g., 152
|
||||
const maxElev = data.max_elevation; // e.g., 198
|
||||
const elevRange = maxElev - minElev || 1; // Avoid division by zero
|
||||
|
||||
console.log(`[Elevation] Local range: ${minElev}m - ${maxElev}m (${elevRange}m difference)`);
|
||||
|
||||
// Fill pixel data with NORMALIZED colors
|
||||
for (let i = 0; i < data.rows; i++) {
|
||||
for (let j = 0; j < data.cols; j++) {
|
||||
const elevation = data.elevations[i][j];
|
||||
|
||||
// Normalize to 0-1 based on LOCAL range
|
||||
const normalized = (elevation - minElev) / elevRange;
|
||||
|
||||
const color = getColorForNormalizedElevation(normalized);
|
||||
|
||||
const idx = (i * data.cols + j) * 4;
|
||||
imageData.data[idx] = color[0]; // R
|
||||
imageData.data[idx + 1] = color[1]; // G
|
||||
imageData.data[idx + 2] = color[2]; // B
|
||||
imageData.data[idx + 3] = 255; // A (full opacity, layer opacity handled separately)
|
||||
}
|
||||
}
|
||||
|
||||
// ... rest of canvas/overlay code ...
|
||||
}, [enabled, opacity, bbox, map]);
|
||||
```
|
||||
|
||||
**Also add elevation legend showing LOCAL range:**
|
||||
|
||||
```tsx
|
||||
// In the parent component (App.tsx or Map.tsx), show legend:
|
||||
{showElevation && elevationRange && (
|
||||
<div className="elevation-legend">
|
||||
<div className="legend-title">Elevation</div>
|
||||
<div className="legend-gradient"></div>
|
||||
<div className="legend-labels">
|
||||
<span>{elevationRange.min}m</span>
|
||||
<span>{elevationRange.max}m</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
// CSS for gradient:
|
||||
.legend-gradient {
|
||||
height: 10px;
|
||||
background: linear-gradient(to right,
|
||||
#2166ac, /* deep blue - low */
|
||||
#67a9cf, /* light blue */
|
||||
#91cf60, /* green */
|
||||
#fee08b, /* yellow */
|
||||
#fc8d59, /* orange */
|
||||
#d73027 /* brown - high */
|
||||
);
|
||||
border-radius: 2px;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🟢 Enhancement 2.4.2d: GPU Install Message
|
||||
|
||||
**File:** `backend/app/services/gpu_service.py`
|
||||
|
||||
**Add clear install instructions on startup:**
|
||||
|
||||
```python
|
||||
# At module init:
|
||||
|
||||
GPU_AVAILABLE = False
|
||||
GPU_INFO = None
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
|
||||
# Check CUDA
|
||||
device_count = cp.cuda.runtime.getDeviceCount()
|
||||
if device_count > 0:
|
||||
GPU_AVAILABLE = True
|
||||
props = cp.cuda.runtime.getDeviceProperties(0)
|
||||
GPU_INFO = {
|
||||
'name': props['name'].decode() if isinstance(props['name'], bytes) else str(props['name']),
|
||||
'memory_mb': props['totalGlobalMem'] // (1024 * 1024),
|
||||
'cuda_version': cp.cuda.runtime.runtimeGetVersion(),
|
||||
}
|
||||
print(f"[GPU] ✓ CUDA available: {GPU_INFO['name']} ({GPU_INFO['memory_mb']} MB)")
|
||||
else:
|
||||
print("[GPU] ✗ No CUDA devices found")
|
||||
|
||||
except ImportError:
|
||||
print("[GPU] ✗ CuPy not installed — using CPU/NumPy")
|
||||
print("[GPU] To enable GPU acceleration, install CuPy:")
|
||||
print("[GPU] ")
|
||||
print("[GPU] For CUDA 12.x: pip install cupy-cuda12x")
|
||||
print("[GPU] For CUDA 11.x: pip install cupy-cuda11x")
|
||||
print("[GPU] ")
|
||||
print("[GPU] Check CUDA version: nvidia-smi")
|
||||
|
||||
except Exception as e:
|
||||
print(f"[GPU] ✗ CuPy error: {e}")
|
||||
print("[GPU] GPU acceleration disabled")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📁 Files to Modify
|
||||
|
||||
| File | Changes |
|
||||
|------|---------|
|
||||
| `backend/app/services/parallel_coverage_service.py` | Rewrite `_kill_worker_processes()` to kill by name; add spawn context |
|
||||
| `backend/app/services/dominant_path_service.py` | Add detailed filter logging; reduce limits to 50/30; add ENABLE_REFLECTIONS flag |
|
||||
| `frontend/src/components/map/ElevationLayer.tsx` | Use local min/max for color normalization |
|
||||
| `backend/app/services/gpu_service.py` | Add clear install instructions |
|
||||
|
||||
---
|
||||
|
||||
## 🧪 Testing
|
||||
|
||||
### Test 1: Memory Cleanup
|
||||
```bash
|
||||
# Run Detailed preset (will timeout)
|
||||
# Watch Task Manager during and after
|
||||
# After timeout message:
|
||||
# - Should see only 1 rfcp-server.exe
|
||||
# - RAM should drop from 8GB to <500MB
|
||||
```
|
||||
|
||||
### Test 2: Dominant Path Logging
|
||||
```bash
|
||||
# Run debug mode, watch console
|
||||
# Should see:
|
||||
# [FILTER] 646 → 50 buildings (max_count=50, max_dist=300m)
|
||||
# [DOMINANT_PATH] Point #1: line_bldgs=50 (was 646), refl_bldgs=30 (was 302)
|
||||
```
|
||||
|
||||
### Test 3: Elevation Contrast
|
||||
```bash
|
||||
# Open app
|
||||
# Enable elevation layer
|
||||
# Should see color variation:
|
||||
# - Blue in valleys
|
||||
# - Green/yellow on slopes
|
||||
# - Brown/orange on hills
|
||||
# Console should show: "[Elevation] Local range: 152m - 198m"
|
||||
```
|
||||
|
||||
### Test 4: GPU Message
|
||||
```bash
|
||||
# Start server, check console
|
||||
# Should see clear message about CuPy install
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✅ Success Criteria
|
||||
|
||||
- [ ] After timeout: only 1 rfcp-server.exe, RAM < 500MB
|
||||
- [ ] Dominant path logs show filtered counts (50 buildings, not 600)
|
||||
- [ ] Detailed preset completes in <120s OR logs explain why still slow
|
||||
- [ ] Elevation layer shows visible terrain contrast
|
||||
- [ ] GPU install instructions visible in console
|
||||
|
||||
---
|
||||
|
||||
## 📈 Expected Results After Fixes
|
||||
|
||||
| Metric | Before | After |
|
||||
|--------|--------|-------|
|
||||
| Workers after timeout | 8 (7.8GB) | 1 (<500MB) |
|
||||
| Buildings per point | 600+ | 50 |
|
||||
| Detailed time | 300s timeout | ~60-90s |
|
||||
| Elevation | All green | Color gradient |
|
||||
|
||||
---
|
||||
|
||||
## 🔜 Next Phase
|
||||
|
||||
After 2.4.2:
|
||||
- Phase 2.5: Loading screen with fun facts
|
||||
- Phase 2.5: Better error messages in UI
|
||||
- Phase 2.6: Export coverage to GeoJSON/KML
|
||||
Reference in New Issue
Block a user