@mytec: iter2.4.2 start
This commit is contained in:
@@ -22,7 +22,8 @@
|
|||||||
"Write(*)",
|
"Write(*)",
|
||||||
"Bash(python3:*)",
|
"Bash(python3:*)",
|
||||||
"Bash(source:*)",
|
"Bash(source:*)",
|
||||||
"Bash(/mnt/d/root/rfcp/venv/bin/python3:*)"
|
"Bash(/mnt/d/root/rfcp/venv/bin/python3:*)",
|
||||||
|
"Bash(node --check:*)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
495
RFCP-Phase-2.4.2-Final-Fixes.md
Normal file
495
RFCP-Phase-2.4.2-Final-Fixes.md
Normal file
@@ -0,0 +1,495 @@
|
|||||||
|
# RFCP Phase 2.4.2: Final Critical Fixes
|
||||||
|
|
||||||
|
**Date:** February 1, 2025
|
||||||
|
**Type:** Bug Fixes
|
||||||
|
**Priority:** CRITICAL
|
||||||
|
**Depends on:** Phase 2.4.1
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎯 Summary
|
||||||
|
|
||||||
|
Phase 2.4.1 частково спрацював, але залишились проблеми:
|
||||||
|
- Memory leak — workers не вбиваються (psutil не працює для grandchildren)
|
||||||
|
- Dominant path — все ще timeout (фільтр можливо не застосовується)
|
||||||
|
- Elevation — все зелене (немає локального контрасту)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔴 Bug 2.4.2a: Memory Leak — Nuclear Kill
|
||||||
|
|
||||||
|
**Problem:**
|
||||||
|
psutil.Process.children() не бачить grandchildren (worker subprocess → python subprocess).
|
||||||
|
Після cleanup все ще 8× rfcp-server.exe, 8GB RAM.
|
||||||
|
|
||||||
|
**File:** `backend/app/services/parallel_coverage_service.py`
|
||||||
|
|
||||||
|
**Current code doesn't work:**
|
||||||
|
```python
|
||||||
|
current = psutil.Process(os.getpid())
|
||||||
|
children = current.children(recursive=True)
|
||||||
|
for child in children:
|
||||||
|
child.terminate()
|
||||||
|
```
|
||||||
|
|
||||||
|
**Fix — kill by process NAME, not by PID tree:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def _kill_worker_processes():
|
||||||
|
"""
|
||||||
|
Nuclear option: kill ALL rfcp-server processes except main.
|
||||||
|
This handles grandchildren that psutil can't see.
|
||||||
|
"""
|
||||||
|
my_pid = os.getpid()
|
||||||
|
killed_count = 0
|
||||||
|
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
# Windows: use tasklist to find all rfcp-server.exe, kill all except self
|
||||||
|
try:
|
||||||
|
# Get list of all rfcp-server PIDs
|
||||||
|
result = subprocess.run(
|
||||||
|
['tasklist', '/FI', 'IMAGENAME eq rfcp-server.exe', '/FO', 'CSV', '/NH'],
|
||||||
|
capture_output=True, text=True, timeout=5
|
||||||
|
)
|
||||||
|
|
||||||
|
for line in result.stdout.strip().split('\n'):
|
||||||
|
if 'rfcp-server.exe' in line:
|
||||||
|
# Parse PID from CSV: "rfcp-server.exe","1234",...
|
||||||
|
parts = line.split(',')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
pid_str = parts[1].strip().strip('"')
|
||||||
|
try:
|
||||||
|
pid = int(pid_str)
|
||||||
|
if pid != my_pid:
|
||||||
|
subprocess.run(
|
||||||
|
['taskkill', '/F', '/PID', str(pid)],
|
||||||
|
capture_output=True, timeout=5
|
||||||
|
)
|
||||||
|
killed_count += 1
|
||||||
|
_clog(f"Killed worker PID {pid}")
|
||||||
|
except (ValueError, subprocess.TimeoutExpired):
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
_clog(f"Kill workers error: {e}")
|
||||||
|
# Fallback: kill ALL rfcp-server except hope main survives
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
['taskkill', '/F', '/IM', 'rfcp-server.exe', '/T'],
|
||||||
|
capture_output=True, timeout=5
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# Unix: use pgrep/pkill
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['pgrep', '-f', 'rfcp-server'],
|
||||||
|
capture_output=True, text=True, timeout=5
|
||||||
|
)
|
||||||
|
for pid_str in result.stdout.strip().split('\n'):
|
||||||
|
if pid_str:
|
||||||
|
try:
|
||||||
|
pid = int(pid_str)
|
||||||
|
if pid != my_pid:
|
||||||
|
os.kill(pid, 9) # SIGKILL
|
||||||
|
killed_count += 1
|
||||||
|
_clog(f"Killed worker PID {pid}")
|
||||||
|
except (ValueError, ProcessLookupError, PermissionError):
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
_clog(f"Kill workers error: {e}")
|
||||||
|
|
||||||
|
return killed_count
|
||||||
|
```
|
||||||
|
|
||||||
|
**Also update ProcessPoolExecutor to use spawn context explicitly:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import multiprocessing as mp
|
||||||
|
|
||||||
|
def _calculate_with_process_pool(...):
|
||||||
|
# Use spawn to ensure clean worker processes
|
||||||
|
ctx = mp.get_context('spawn')
|
||||||
|
pool = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
pool = ProcessPoolExecutor(
|
||||||
|
max_workers=num_workers,
|
||||||
|
mp_context=ctx
|
||||||
|
)
|
||||||
|
# ... rest of code ...
|
||||||
|
finally:
|
||||||
|
if pool:
|
||||||
|
pool.shutdown(wait=False, cancel_futures=True)
|
||||||
|
|
||||||
|
# Give pool time to cleanup
|
||||||
|
import time
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
# Then force kill any survivors
|
||||||
|
killed = _kill_worker_processes()
|
||||||
|
if killed > 0:
|
||||||
|
_clog(f"Force killed {killed} orphaned workers")
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔴 Bug 2.4.2b: Dominant Path — Add Logging + Reduce Limits
|
||||||
|
|
||||||
|
**Problem:**
|
||||||
|
Detailed preset still timeouts. Unknown if filter is being applied.
|
||||||
|
|
||||||
|
**File:** `backend/app/services/dominant_path_service.py`
|
||||||
|
|
||||||
|
**Add diagnostic logging to verify filter works:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
# At the TOP of the file, add constants (if not already there):
|
||||||
|
MAX_BUILDINGS_FOR_LINE = 50 # Reduced from 100
|
||||||
|
MAX_BUILDINGS_FOR_REFLECTION = 30 # Reduced from 100
|
||||||
|
MAX_DISTANCE_FROM_PATH = 300 # Reduced from 500m
|
||||||
|
|
||||||
|
def _filter_buildings_by_distance(buildings, tx_point, rx_point, max_count, max_distance):
|
||||||
|
"""Filter buildings to nearest N within max_distance of path."""
|
||||||
|
|
||||||
|
original_count = len(buildings)
|
||||||
|
|
||||||
|
if original_count <= max_count:
|
||||||
|
_log(f"[FILTER] {original_count} buildings, no filter needed")
|
||||||
|
return buildings
|
||||||
|
|
||||||
|
# Calculate midpoint
|
||||||
|
mid_lat = (tx_point[0] + rx_point[0]) / 2
|
||||||
|
mid_lon = (tx_point[1] + rx_point[1]) / 2
|
||||||
|
|
||||||
|
# Calculate squared distance to midpoint (no sqrt for speed)
|
||||||
|
def dist_sq(b):
|
||||||
|
blat = b.get('centroid_lat') or b.get('lat', mid_lat)
|
||||||
|
blon = b.get('centroid_lon') or b.get('lon', mid_lon)
|
||||||
|
dlat = (blat - mid_lat) * 111000
|
||||||
|
dlon = (blon - mid_lon) * 111000 * 0.65 # cos(50°) ≈ 0.65
|
||||||
|
return dlat*dlat + dlon*dlon
|
||||||
|
|
||||||
|
# Sort by distance
|
||||||
|
buildings_sorted = sorted(buildings, key=dist_sq)
|
||||||
|
|
||||||
|
# Filter by max distance
|
||||||
|
max_dist_sq = max_distance * max_distance
|
||||||
|
filtered = [b for b in buildings_sorted if dist_sq(b) <= max_dist_sq]
|
||||||
|
|
||||||
|
# Take top N
|
||||||
|
result = filtered[:max_count]
|
||||||
|
|
||||||
|
_log(f"[FILTER] {original_count} → {len(result)} buildings (max_count={max_count}, max_dist={max_distance}m)")
|
||||||
|
|
||||||
|
return result
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify the filter is CALLED in the main function:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
def find_dominant_path_sync(tx, rx, buildings, vegetation, spatial_idx, frequency, ...):
|
||||||
|
"""Find dominant propagation path."""
|
||||||
|
|
||||||
|
# Get buildings from spatial index
|
||||||
|
line_buildings_raw = spatial_idx.query_line(tx['lat'], tx['lon'], rx['lat'], rx['lon'])
|
||||||
|
|
||||||
|
# FILTER - this MUST be called
|
||||||
|
line_buildings = _filter_buildings_by_distance(
|
||||||
|
line_buildings_raw,
|
||||||
|
(tx['lat'], tx['lon']),
|
||||||
|
(rx['lat'], rx['lon']),
|
||||||
|
max_count=MAX_BUILDINGS_FOR_LINE,
|
||||||
|
max_distance=MAX_DISTANCE_FROM_PATH
|
||||||
|
)
|
||||||
|
|
||||||
|
# Same for reflection candidates
|
||||||
|
mid_lat = (tx['lat'] + rx['lat']) / 2
|
||||||
|
mid_lon = (tx['lon'] + rx['lon']) / 2
|
||||||
|
refl_buildings_raw = spatial_idx.query_point(mid_lat, mid_lon, buffer_cells=3)
|
||||||
|
|
||||||
|
refl_buildings = _filter_buildings_by_distance(
|
||||||
|
refl_buildings_raw,
|
||||||
|
(tx['lat'], tx['lon']),
|
||||||
|
(rx['lat'], rx['lon']),
|
||||||
|
max_count=MAX_BUILDINGS_FOR_REFLECTION,
|
||||||
|
max_distance=MAX_DISTANCE_FROM_PATH
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update diagnostic log to show FILTERED counts
|
||||||
|
if _point_counter[0] <= 3:
|
||||||
|
print(f"[DOMINANT_PATH] Point #{_point_counter[0]}: "
|
||||||
|
f"line_bldgs={len(line_buildings)} (was {len(line_buildings_raw)}), "
|
||||||
|
f"refl_bldgs={len(refl_buildings)} (was {len(refl_buildings_raw)})")
|
||||||
|
|
||||||
|
# ... rest of function ...
|
||||||
|
```
|
||||||
|
|
||||||
|
**If still too slow — option to DISABLE reflections:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Quick fix: skip reflection calculation entirely
|
||||||
|
ENABLE_REFLECTIONS = False # Set to True when performance is fixed
|
||||||
|
|
||||||
|
def find_dominant_path_sync(...):
|
||||||
|
# Direct path
|
||||||
|
direct_loss = calculate_direct_path(...)
|
||||||
|
|
||||||
|
if not ENABLE_REFLECTIONS:
|
||||||
|
return direct_loss
|
||||||
|
|
||||||
|
# Reflection paths (slow)
|
||||||
|
reflection_loss = calculate_reflections(...)
|
||||||
|
|
||||||
|
return min(direct_loss, reflection_loss)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🟡 Bug 2.4.2c: Elevation Layer — Local Min/Max Contrast
|
||||||
|
|
||||||
|
**Problem:**
|
||||||
|
All green because using absolute thresholds (100m, 150m, 200m...) but local terrain varies only 150-200m.
|
||||||
|
|
||||||
|
**File:** `frontend/src/components/map/ElevationLayer.tsx`
|
||||||
|
|
||||||
|
**Fix — use RELATIVE coloring based on local min/max:**
|
||||||
|
|
||||||
|
```tsx
|
||||||
|
// Color palette (keep these)
|
||||||
|
const COLORS = {
|
||||||
|
DEEP_BLUE: [33, 102, 172], // Lowest
|
||||||
|
LIGHT_BLUE: [103, 169, 207],
|
||||||
|
GREEN: [145, 207, 96],
|
||||||
|
YELLOW: [254, 224, 139],
|
||||||
|
ORANGE: [252, 141, 89],
|
||||||
|
BROWN: [215, 48, 39], // Highest
|
||||||
|
};
|
||||||
|
|
||||||
|
// Interpolate between two colors
|
||||||
|
function interpolateColor(
|
||||||
|
color1: number[],
|
||||||
|
color2: number[],
|
||||||
|
t: number
|
||||||
|
): [number, number, number] {
|
||||||
|
return [
|
||||||
|
Math.round(color1[0] + (color2[0] - color1[0]) * t),
|
||||||
|
Math.round(color1[1] + (color2[1] - color1[1]) * t),
|
||||||
|
Math.round(color1[2] + (color2[2] - color1[2]) * t),
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
// NEW: Get color based on NORMALIZED elevation (0-1)
|
||||||
|
function getColorForNormalizedElevation(normalized: number): [number, number, number] {
|
||||||
|
// Clamp to 0-1
|
||||||
|
const n = Math.max(0, Math.min(1, normalized));
|
||||||
|
|
||||||
|
if (n < 0.2) {
|
||||||
|
// 0-20%: deep blue → light blue
|
||||||
|
return interpolateColor(COLORS.DEEP_BLUE, COLORS.LIGHT_BLUE, n / 0.2);
|
||||||
|
} else if (n < 0.4) {
|
||||||
|
// 20-40%: light blue → green
|
||||||
|
return interpolateColor(COLORS.LIGHT_BLUE, COLORS.GREEN, (n - 0.2) / 0.2);
|
||||||
|
} else if (n < 0.6) {
|
||||||
|
// 40-60%: green → yellow
|
||||||
|
return interpolateColor(COLORS.GREEN, COLORS.YELLOW, (n - 0.4) / 0.2);
|
||||||
|
} else if (n < 0.8) {
|
||||||
|
// 60-80%: yellow → orange
|
||||||
|
return interpolateColor(COLORS.YELLOW, COLORS.ORANGE, (n - 0.6) / 0.2);
|
||||||
|
} else {
|
||||||
|
// 80-100%: orange → brown
|
||||||
|
return interpolateColor(COLORS.ORANGE, COLORS.BROWN, (n - 0.8) / 0.2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// In the main render function, USE local min/max:
|
||||||
|
useEffect(() => {
|
||||||
|
// ... fetch elevation data ...
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
// Get LOCAL min/max from the actual data
|
||||||
|
const minElev = data.min_elevation; // e.g., 152
|
||||||
|
const maxElev = data.max_elevation; // e.g., 198
|
||||||
|
const elevRange = maxElev - minElev || 1; // Avoid division by zero
|
||||||
|
|
||||||
|
console.log(`[Elevation] Local range: ${minElev}m - ${maxElev}m (${elevRange}m difference)`);
|
||||||
|
|
||||||
|
// Fill pixel data with NORMALIZED colors
|
||||||
|
for (let i = 0; i < data.rows; i++) {
|
||||||
|
for (let j = 0; j < data.cols; j++) {
|
||||||
|
const elevation = data.elevations[i][j];
|
||||||
|
|
||||||
|
// Normalize to 0-1 based on LOCAL range
|
||||||
|
const normalized = (elevation - minElev) / elevRange;
|
||||||
|
|
||||||
|
const color = getColorForNormalizedElevation(normalized);
|
||||||
|
|
||||||
|
const idx = (i * data.cols + j) * 4;
|
||||||
|
imageData.data[idx] = color[0]; // R
|
||||||
|
imageData.data[idx + 1] = color[1]; // G
|
||||||
|
imageData.data[idx + 2] = color[2]; // B
|
||||||
|
imageData.data[idx + 3] = 255; // A (full opacity, layer opacity handled separately)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ... rest of canvas/overlay code ...
|
||||||
|
}, [enabled, opacity, bbox, map]);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Also add elevation legend showing LOCAL range:**
|
||||||
|
|
||||||
|
```tsx
|
||||||
|
// In the parent component (App.tsx or Map.tsx), show legend:
|
||||||
|
{showElevation && elevationRange && (
|
||||||
|
<div className="elevation-legend">
|
||||||
|
<div className="legend-title">Elevation</div>
|
||||||
|
<div className="legend-gradient"></div>
|
||||||
|
<div className="legend-labels">
|
||||||
|
<span>{elevationRange.min}m</span>
|
||||||
|
<span>{elevationRange.max}m</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
// CSS for gradient:
|
||||||
|
.legend-gradient {
|
||||||
|
height: 10px;
|
||||||
|
background: linear-gradient(to right,
|
||||||
|
#2166ac, /* deep blue - low */
|
||||||
|
#67a9cf, /* light blue */
|
||||||
|
#91cf60, /* green */
|
||||||
|
#fee08b, /* yellow */
|
||||||
|
#fc8d59, /* orange */
|
||||||
|
#d73027 /* brown - high */
|
||||||
|
);
|
||||||
|
border-radius: 2px;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🟢 Enhancement 2.4.2d: GPU Install Message
|
||||||
|
|
||||||
|
**File:** `backend/app/services/gpu_service.py`
|
||||||
|
|
||||||
|
**Add clear install instructions on startup:**
|
||||||
|
|
||||||
|
```python
|
||||||
|
# At module init:
|
||||||
|
|
||||||
|
GPU_AVAILABLE = False
|
||||||
|
GPU_INFO = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
import cupy as cp
|
||||||
|
|
||||||
|
# Check CUDA
|
||||||
|
device_count = cp.cuda.runtime.getDeviceCount()
|
||||||
|
if device_count > 0:
|
||||||
|
GPU_AVAILABLE = True
|
||||||
|
props = cp.cuda.runtime.getDeviceProperties(0)
|
||||||
|
GPU_INFO = {
|
||||||
|
'name': props['name'].decode() if isinstance(props['name'], bytes) else str(props['name']),
|
||||||
|
'memory_mb': props['totalGlobalMem'] // (1024 * 1024),
|
||||||
|
'cuda_version': cp.cuda.runtime.runtimeGetVersion(),
|
||||||
|
}
|
||||||
|
print(f"[GPU] ✓ CUDA available: {GPU_INFO['name']} ({GPU_INFO['memory_mb']} MB)")
|
||||||
|
else:
|
||||||
|
print("[GPU] ✗ No CUDA devices found")
|
||||||
|
|
||||||
|
except ImportError:
|
||||||
|
print("[GPU] ✗ CuPy not installed — using CPU/NumPy")
|
||||||
|
print("[GPU] To enable GPU acceleration, install CuPy:")
|
||||||
|
print("[GPU] ")
|
||||||
|
print("[GPU] For CUDA 12.x: pip install cupy-cuda12x")
|
||||||
|
print("[GPU] For CUDA 11.x: pip install cupy-cuda11x")
|
||||||
|
print("[GPU] ")
|
||||||
|
print("[GPU] Check CUDA version: nvidia-smi")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[GPU] ✗ CuPy error: {e}")
|
||||||
|
print("[GPU] GPU acceleration disabled")
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📁 Files to Modify
|
||||||
|
|
||||||
|
| File | Changes |
|
||||||
|
|------|---------|
|
||||||
|
| `backend/app/services/parallel_coverage_service.py` | Rewrite `_kill_worker_processes()` to kill by name; add spawn context |
|
||||||
|
| `backend/app/services/dominant_path_service.py` | Add detailed filter logging; reduce limits to 50/30; add ENABLE_REFLECTIONS flag |
|
||||||
|
| `frontend/src/components/map/ElevationLayer.tsx` | Use local min/max for color normalization |
|
||||||
|
| `backend/app/services/gpu_service.py` | Add clear install instructions |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧪 Testing
|
||||||
|
|
||||||
|
### Test 1: Memory Cleanup
|
||||||
|
```bash
|
||||||
|
# Run Detailed preset (will timeout)
|
||||||
|
# Watch Task Manager during and after
|
||||||
|
# After timeout message:
|
||||||
|
# - Should see only 1 rfcp-server.exe
|
||||||
|
# - RAM should drop from 8GB to <500MB
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test 2: Dominant Path Logging
|
||||||
|
```bash
|
||||||
|
# Run debug mode, watch console
|
||||||
|
# Should see:
|
||||||
|
# [FILTER] 646 → 50 buildings (max_count=50, max_dist=300m)
|
||||||
|
# [DOMINANT_PATH] Point #1: line_bldgs=50 (was 646), refl_bldgs=30 (was 302)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test 3: Elevation Contrast
|
||||||
|
```bash
|
||||||
|
# Open app
|
||||||
|
# Enable elevation layer
|
||||||
|
# Should see color variation:
|
||||||
|
# - Blue in valleys
|
||||||
|
# - Green/yellow on slopes
|
||||||
|
# - Brown/orange on hills
|
||||||
|
# Console should show: "[Elevation] Local range: 152m - 198m"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test 4: GPU Message
|
||||||
|
```bash
|
||||||
|
# Start server, check console
|
||||||
|
# Should see clear message about CuPy install
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✅ Success Criteria
|
||||||
|
|
||||||
|
- [ ] After timeout: only 1 rfcp-server.exe, RAM < 500MB
|
||||||
|
- [ ] Dominant path logs show filtered counts (50 buildings, not 600)
|
||||||
|
- [ ] Detailed preset completes in <120s OR logs explain why still slow
|
||||||
|
- [ ] Elevation layer shows visible terrain contrast
|
||||||
|
- [ ] GPU install instructions visible in console
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📈 Expected Results After Fixes
|
||||||
|
|
||||||
|
| Metric | Before | After |
|
||||||
|
|--------|--------|-------|
|
||||||
|
| Workers after timeout | 8 (7.8GB) | 1 (<500MB) |
|
||||||
|
| Buildings per point | 600+ | 50 |
|
||||||
|
| Detailed time | 300s timeout | ~60-90s |
|
||||||
|
| Elevation | All green | Color gradient |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔜 Next Phase
|
||||||
|
|
||||||
|
After 2.4.2:
|
||||||
|
- Phase 2.5: Loading screen with fun facts
|
||||||
|
- Phase 2.5: Better error messages in UI
|
||||||
|
- Phase 2.6: Export coverage to GeoJSON/KML
|
||||||
@@ -84,9 +84,15 @@ async def calculate_coverage(request: CoverageRequest) -> CoverageResponse:
|
|||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
cancel_token.cancel()
|
cancel_token.cancel()
|
||||||
raise HTTPException(408, "Calculation timeout (5 min) — try smaller radius or lower resolution")
|
# Force cleanup orphaned worker processes
|
||||||
|
from app.services.parallel_coverage_service import _kill_worker_processes
|
||||||
|
killed = _kill_worker_processes()
|
||||||
|
detail = f"Calculation timeout (5 min). Cleaned up {killed} workers." if killed else "Calculation timeout (5 min) — try smaller radius or lower resolution"
|
||||||
|
raise HTTPException(408, detail)
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
cancel_token.cancel()
|
cancel_token.cancel()
|
||||||
|
from app.services.parallel_coverage_service import _kill_worker_processes
|
||||||
|
_kill_worker_processes()
|
||||||
raise HTTPException(499, "Client disconnected")
|
raise HTTPException(499, "Client disconnected")
|
||||||
|
|
||||||
computation_time = time.time() - start_time
|
computation_time = time.time() - start_time
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
import asyncio
|
||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
||||||
@@ -42,3 +44,17 @@ async def get_system_info():
|
|||||||
"gpu": gpu_info,
|
"gpu": gpu_info,
|
||||||
"gpu_available": gpu_info.get("available", False),
|
"gpu_available": gpu_info.get("available", False),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/shutdown")
|
||||||
|
async def shutdown():
|
||||||
|
"""Graceful shutdown endpoint. Kills worker processes and exits."""
|
||||||
|
from app.services.parallel_coverage_service import _kill_worker_processes
|
||||||
|
|
||||||
|
killed = _kill_worker_processes()
|
||||||
|
|
||||||
|
# Schedule hard exit after response is sent
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.call_later(0.5, lambda: os._exit(0))
|
||||||
|
|
||||||
|
return {"status": "shutting down", "workers_killed": killed}
|
||||||
|
|||||||
@@ -21,6 +21,45 @@ class RayPath:
|
|||||||
is_valid: bool # Does this path exist?
|
is_valid: bool # Does this path exist?
|
||||||
|
|
||||||
|
|
||||||
|
MAX_BUILDINGS_FOR_LINE = 100
|
||||||
|
MAX_BUILDINGS_FOR_REFLECTION = 100
|
||||||
|
MAX_DISTANCE_FROM_PATH = 500 # meters
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_buildings_by_distance(buildings, tx_point, rx_point, max_count=100, max_distance=500):
|
||||||
|
"""Filter buildings to only those close to the TX-RX path.
|
||||||
|
|
||||||
|
Sort by distance to path midpoint, filter by max_distance, take top max_count.
|
||||||
|
Uses squared Euclidean distance (no sqrt) for speed.
|
||||||
|
"""
|
||||||
|
if len(buildings) <= max_count:
|
||||||
|
return buildings
|
||||||
|
|
||||||
|
mid_lat = (tx_point[0] + rx_point[0]) / 2
|
||||||
|
mid_lon = (tx_point[1] + rx_point[1]) / 2
|
||||||
|
|
||||||
|
max_dist_sq = max_distance * max_distance
|
||||||
|
|
||||||
|
def dist_sq_to_midpoint(building):
|
||||||
|
# Building centroid from geometry or fallback to midpoint
|
||||||
|
geom = building.geometry
|
||||||
|
if geom:
|
||||||
|
blat = sum(p[1] for p in geom) / len(geom)
|
||||||
|
blon = sum(p[0] for p in geom) / len(geom)
|
||||||
|
else:
|
||||||
|
blat, blon = mid_lat, mid_lon
|
||||||
|
dlat = (blat - mid_lat) * 111000
|
||||||
|
dlon = (blon - mid_lon) * 111000 * 0.7 # rough cos correction
|
||||||
|
return dlat * dlat + dlon * dlon
|
||||||
|
|
||||||
|
scored = [(b, dist_sq_to_midpoint(b)) for b in buildings]
|
||||||
|
scored.sort(key=lambda x: x[1])
|
||||||
|
|
||||||
|
# Filter by max distance and take top N
|
||||||
|
filtered = [b for b, d in scored if d <= max_dist_sq]
|
||||||
|
return filtered[:max_count]
|
||||||
|
|
||||||
|
|
||||||
class DominantPathService:
|
class DominantPathService:
|
||||||
"""
|
"""
|
||||||
Find dominant propagation paths (2-3 strongest)
|
Find dominant propagation paths (2-3 strongest)
|
||||||
@@ -420,6 +459,15 @@ class DominantPathService:
|
|||||||
else:
|
else:
|
||||||
line_buildings = buildings
|
line_buildings = buildings
|
||||||
|
|
||||||
|
# Filter to limit building count — prevents 600+ buildings per point
|
||||||
|
original_line_count = len(line_buildings)
|
||||||
|
line_buildings = _filter_buildings_by_distance(
|
||||||
|
line_buildings,
|
||||||
|
(tx_lat, tx_lon), (rx_lat, rx_lon),
|
||||||
|
max_count=MAX_BUILDINGS_FOR_LINE,
|
||||||
|
max_distance=MAX_DISTANCE_FROM_PATH,
|
||||||
|
)
|
||||||
|
|
||||||
direct = self._check_direct_path_sync(
|
direct = self._check_direct_path_sync(
|
||||||
tx_lat, tx_lon, tx_height,
|
tx_lat, tx_lon, tx_height,
|
||||||
rx_lat, rx_lon, rx_height,
|
rx_lat, rx_lon, rx_height,
|
||||||
@@ -442,13 +490,22 @@ class DominantPathService:
|
|||||||
else:
|
else:
|
||||||
reflection_buildings = buildings
|
reflection_buildings = buildings
|
||||||
|
|
||||||
|
# Filter reflection buildings to limit count
|
||||||
|
original_refl_count = len(reflection_buildings)
|
||||||
|
reflection_buildings = _filter_buildings_by_distance(
|
||||||
|
reflection_buildings,
|
||||||
|
(tx_lat, tx_lon), (rx_lat, rx_lon),
|
||||||
|
max_count=MAX_BUILDINGS_FOR_REFLECTION,
|
||||||
|
max_distance=MAX_DISTANCE_FROM_PATH,
|
||||||
|
)
|
||||||
|
|
||||||
# Log building counts for first 3 points so user can verify filtering
|
# Log building counts for first 3 points so user can verify filtering
|
||||||
DominantPathService._log_count += 1
|
DominantPathService._log_count += 1
|
||||||
if DominantPathService._log_count <= 3:
|
if DominantPathService._log_count <= 3:
|
||||||
import sys
|
import sys
|
||||||
msg = (f"[DOMINANT_PATH] Point #{DominantPathService._log_count}: "
|
msg = (f"[DOMINANT_PATH] Point #{DominantPathService._log_count}: "
|
||||||
f"line_bldgs={len(line_buildings)}, "
|
f"line_bldgs={len(line_buildings)} (from {original_line_count}), "
|
||||||
f"refl_bldgs={len(reflection_buildings)}, "
|
f"refl_bldgs={len(reflection_buildings)} (from {original_refl_count}), "
|
||||||
f"total_available={len(buildings)}, "
|
f"total_available={len(buildings)}, "
|
||||||
f"spatial_idx={'YES' if spatial_idx else 'NO'}, "
|
f"spatial_idx={'YES' if spatial_idx else 'NO'}, "
|
||||||
f"early_exit={'YES' if direct and direct.is_valid and not direct.materials_crossed else 'NO'}")
|
f"early_exit={'YES' if direct and direct.is_valid and not direct.materials_crossed else 'NO'}")
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ import threading
|
|||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
from typing import List, Dict, Tuple, Any, Optional, Callable
|
from typing import List, Dict, Tuple, Any, Optional, Callable
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
|
||||||
# ── Cancellation token ──
|
# ── Cancellation token ──
|
||||||
@@ -46,6 +47,46 @@ class CancellationToken:
|
|||||||
return self._event.is_set()
|
return self._event.is_set()
|
||||||
|
|
||||||
|
|
||||||
|
# ── Worker process cleanup ──
|
||||||
|
|
||||||
|
def _kill_worker_processes() -> int:
|
||||||
|
"""Kill all child processes of the current process.
|
||||||
|
|
||||||
|
Uses psutil to find and terminate/kill child processes that may be
|
||||||
|
orphaned after ProcessPoolExecutor timeout or cancellation.
|
||||||
|
Returns the number of children killed.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
current = psutil.Process(os.getpid())
|
||||||
|
children = current.children(recursive=True)
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not children:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
count = len(children)
|
||||||
|
|
||||||
|
# First: graceful terminate
|
||||||
|
for child in children:
|
||||||
|
try:
|
||||||
|
child.terminate()
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Wait up to 3 seconds for graceful exit
|
||||||
|
gone, alive = psutil.wait_procs(children, timeout=3)
|
||||||
|
|
||||||
|
# Force kill survivors
|
||||||
|
for p in alive:
|
||||||
|
try:
|
||||||
|
p.kill()
|
||||||
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return count
|
||||||
|
|
||||||
|
|
||||||
# ── Try to import Ray ──
|
# ── Try to import Ray ──
|
||||||
|
|
||||||
RAY_AVAILABLE = False
|
RAY_AVAILABLE = False
|
||||||
@@ -426,10 +467,12 @@ def _calculate_with_process_pool(
|
|||||||
|
|
||||||
t_calc = time.time()
|
t_calc = time.time()
|
||||||
all_results: List[Dict] = []
|
all_results: List[Dict] = []
|
||||||
|
pool = None
|
||||||
|
|
||||||
with ProcessPoolExecutor(max_workers=num_workers) as executor:
|
try:
|
||||||
|
pool = ProcessPoolExecutor(max_workers=num_workers)
|
||||||
futures = {
|
futures = {
|
||||||
executor.submit(
|
pool.submit(
|
||||||
_pool_worker_process_chunk,
|
_pool_worker_process_chunk,
|
||||||
(chunk, terrain_cache, buildings, osm_data, config),
|
(chunk, terrain_cache, buildings, osm_data, config),
|
||||||
): i
|
): i
|
||||||
@@ -460,6 +503,17 @@ def _calculate_with_process_pool(
|
|||||||
log_fn(f"Progress: {completed_chunks}/{len(chunks)} chunks ({pct}%) — "
|
log_fn(f"Progress: {completed_chunks}/{len(chunks)} chunks ({pct}%) — "
|
||||||
f"{pts} pts, {rate:.0f} pts/s, ETA {eta:.0f}s")
|
f"{pts} pts, {rate:.0f} pts/s, ETA {eta:.0f}s")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log_fn(f"ProcessPool error: {e}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# CRITICAL: Always cleanup pool and orphaned workers
|
||||||
|
if pool:
|
||||||
|
pool.shutdown(wait=False, cancel_futures=True)
|
||||||
|
killed = _kill_worker_processes()
|
||||||
|
if killed > 0:
|
||||||
|
log_fn(f"Killed {killed} orphaned worker processes")
|
||||||
|
|
||||||
calc_time = time.time() - t_calc
|
calc_time = time.time() - t_calc
|
||||||
log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "
|
log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "
|
||||||
f"({calc_time / max(1, total_points) * 1000:.1f}ms/point)")
|
f"({calc_time / max(1, total_points) * 1000:.1f}ms/point)")
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ requests==2.31.0
|
|||||||
httpx==0.27.0
|
httpx==0.27.0
|
||||||
aiosqlite>=0.19.0
|
aiosqlite>=0.19.0
|
||||||
sqlalchemy>=2.0.0
|
sqlalchemy>=2.0.0
|
||||||
|
psutil>=5.9.0
|
||||||
ray[default]>=2.9.0
|
ray[default]>=2.9.0
|
||||||
# GPU acceleration (optional — install cupy-cuda12x for NVIDIA GPU support)
|
# GPU acceleration (optional — install cupy-cuda12x for NVIDIA GPU support)
|
||||||
# cupy-cuda12x>=13.0.0
|
# cupy-cuda12x>=13.0.0
|
||||||
|
|||||||
@@ -276,7 +276,10 @@ function createMainWindow() {
|
|||||||
store.set('windowState', bounds);
|
store.set('windowState', bounds);
|
||||||
} catch (_e) {}
|
} catch (_e) {}
|
||||||
isQuitting = true;
|
isQuitting = true;
|
||||||
|
// Graceful shutdown is async but we also do sync kill as safety net
|
||||||
|
gracefulShutdown().catch(() => {});
|
||||||
killBackend();
|
killBackend();
|
||||||
|
killAllBackendProcesses();
|
||||||
});
|
});
|
||||||
|
|
||||||
// Load frontend
|
// Load frontend
|
||||||
@@ -360,6 +363,67 @@ function killBackend() {
|
|||||||
log(`[KILL] Backend cleanup complete (PID was ${pid})`);
|
log(`[KILL] Backend cleanup complete (PID was ${pid})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nuclear option: kill ALL rfcp-server processes by name.
|
||||||
|
* This catches orphaned workers that PID-based kill misses.
|
||||||
|
*/
|
||||||
|
function killAllBackendProcesses() {
|
||||||
|
log('[KILL] killAllBackendProcesses() — killing by process name...');
|
||||||
|
|
||||||
|
if (process.platform === 'win32') {
|
||||||
|
try {
|
||||||
|
execSync('taskkill /F /IM rfcp-server.exe /T', {
|
||||||
|
stdio: 'ignore',
|
||||||
|
timeout: 5000
|
||||||
|
});
|
||||||
|
log('[KILL] taskkill /IM rfcp-server.exe completed');
|
||||||
|
} catch (_e) {
|
||||||
|
// Error means no processes found — OK
|
||||||
|
log('[KILL] No rfcp-server.exe processes found (or already killed)');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
execSync('pkill -9 -f rfcp-server', {
|
||||||
|
stdio: 'ignore',
|
||||||
|
timeout: 5000
|
||||||
|
});
|
||||||
|
log('[KILL] pkill rfcp-server completed');
|
||||||
|
} catch (_e) {
|
||||||
|
log('[KILL] No rfcp-server processes found');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Graceful shutdown: ask backend to clean up, then force kill everything.
|
||||||
|
*/
|
||||||
|
async function gracefulShutdown() {
|
||||||
|
log('[SHUTDOWN] Requesting graceful shutdown...');
|
||||||
|
|
||||||
|
// Step 1: Ask backend to clean up workers and exit
|
||||||
|
try {
|
||||||
|
const controller = new AbortController();
|
||||||
|
const timeout = setTimeout(() => controller.abort(), 2000);
|
||||||
|
await fetch('http://127.0.0.1:8888/api/system/shutdown', {
|
||||||
|
method: 'POST',
|
||||||
|
signal: controller.signal
|
||||||
|
});
|
||||||
|
clearTimeout(timeout);
|
||||||
|
log('[SHUTDOWN] Backend acknowledged shutdown');
|
||||||
|
} catch (_e) {
|
||||||
|
log('[SHUTDOWN] Backend did not respond — force killing');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Wait briefly for graceful exit
|
||||||
|
await new Promise(r => setTimeout(r, 500));
|
||||||
|
|
||||||
|
// Step 3: PID-based kill (catches the main process)
|
||||||
|
killBackend();
|
||||||
|
|
||||||
|
// Step 4: Name-based kill (catches orphaned workers)
|
||||||
|
killAllBackendProcesses();
|
||||||
|
}
|
||||||
|
|
||||||
// ── App lifecycle ──────────────────────────────────────────────────
|
// ── App lifecycle ──────────────────────────────────────────────────
|
||||||
|
|
||||||
app.whenReady().then(async () => {
|
app.whenReady().then(async () => {
|
||||||
@@ -393,6 +457,7 @@ app.on('window-all-closed', () => {
|
|||||||
log('[CLOSE] window-all-closed fired');
|
log('[CLOSE] window-all-closed fired');
|
||||||
isQuitting = true;
|
isQuitting = true;
|
||||||
killBackend();
|
killBackend();
|
||||||
|
killAllBackendProcesses();
|
||||||
|
|
||||||
if (process.platform !== 'darwin') {
|
if (process.platform !== 'darwin') {
|
||||||
app.quit();
|
app.quit();
|
||||||
@@ -409,11 +474,13 @@ app.on('before-quit', () => {
|
|||||||
log('[CLOSE] before-quit fired');
|
log('[CLOSE] before-quit fired');
|
||||||
isQuitting = true;
|
isQuitting = true;
|
||||||
killBackend();
|
killBackend();
|
||||||
|
killAllBackendProcesses();
|
||||||
});
|
});
|
||||||
|
|
||||||
app.on('will-quit', () => {
|
app.on('will-quit', () => {
|
||||||
log('[CLOSE] will-quit fired');
|
log('[CLOSE] will-quit fired');
|
||||||
killBackend();
|
killBackend();
|
||||||
|
killAllBackendProcesses();
|
||||||
|
|
||||||
if (backendLogStream) {
|
if (backendLogStream) {
|
||||||
try { backendLogStream.end(); } catch (_e) {}
|
try { backendLogStream.end(); } catch (_e) {}
|
||||||
@@ -427,6 +494,7 @@ process.on('exit', () => {
|
|||||||
console.log(`[KILL] process.exit handler, backendPid=${backendPid}`);
|
console.log(`[KILL] process.exit handler, backendPid=${backendPid}`);
|
||||||
} catch (_e) { /* log stream may be closed */ }
|
} catch (_e) { /* log stream may be closed */ }
|
||||||
|
|
||||||
|
// PID-based kill
|
||||||
if (backendPid) {
|
if (backendPid) {
|
||||||
try {
|
try {
|
||||||
if (process.platform === 'win32') {
|
if (process.platform === 'win32') {
|
||||||
@@ -438,6 +506,24 @@ process.on('exit', () => {
|
|||||||
// Best effort
|
// Best effort
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Name-based kill — catches orphaned workers
|
||||||
|
killAllBackendProcesses();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Handle SIGINT/SIGTERM (Ctrl+C, system shutdown)
|
||||||
|
process.on('SIGINT', () => {
|
||||||
|
try { log('[SIGNAL] SIGINT received'); } catch (_e) {}
|
||||||
|
killBackend();
|
||||||
|
killAllBackendProcesses();
|
||||||
|
process.exit(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
process.on('SIGTERM', () => {
|
||||||
|
try { log('[SIGNAL] SIGTERM received'); } catch (_e) {}
|
||||||
|
killBackend();
|
||||||
|
killAllBackendProcesses();
|
||||||
|
process.exit(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
// ── IPC Handlers ───────────────────────────────────────────────────
|
// ── IPC Handlers ───────────────────────────────────────────────────
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
{"detail":"Calculation timeout (5 min) — try smaller radius or lower resolution"}
|
{"detail":"Calculation timeout (5 min). Cleaned up 6 workers."}
|
||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user