@mytec: iter2.4.2 start

This commit is contained in:
2026-02-01 12:02:52 +02:00
parent fa7378cf3f
commit 4026233b21
10 changed files with 724 additions and 8 deletions

View File

@@ -22,7 +22,8 @@
"Write(*)", "Write(*)",
"Bash(python3:*)", "Bash(python3:*)",
"Bash(source:*)", "Bash(source:*)",
"Bash(/mnt/d/root/rfcp/venv/bin/python3:*)" "Bash(/mnt/d/root/rfcp/venv/bin/python3:*)",
"Bash(node --check:*)"
] ]
} }
} }

View File

@@ -0,0 +1,495 @@
# RFCP Phase 2.4.2: Final Critical Fixes
**Date:** February 1, 2025
**Type:** Bug Fixes
**Priority:** CRITICAL
**Depends on:** Phase 2.4.1
---
## 🎯 Summary
Phase 2.4.1 частково спрацював, але залишились проблеми:
- Memory leak — workers не вбиваються (psutil не працює для grandchildren)
- Dominant path — все ще timeout (фільтр можливо не застосовується)
- Elevation — все зелене (немає локального контрасту)
---
## 🔴 Bug 2.4.2a: Memory Leak — Nuclear Kill
**Problem:**
psutil.Process.children() не бачить grandchildren (worker subprocess → python subprocess).
Після cleanup все ще 8× rfcp-server.exe, 8GB RAM.
**File:** `backend/app/services/parallel_coverage_service.py`
**Current code doesn't work:**
```python
current = psutil.Process(os.getpid())
children = current.children(recursive=True)
for child in children:
child.terminate()
```
**Fix — kill by process NAME, not by PID tree:**
```python
import subprocess
import sys
def _kill_worker_processes():
"""
Nuclear option: kill ALL rfcp-server processes except main.
This handles grandchildren that psutil can't see.
"""
my_pid = os.getpid()
killed_count = 0
if sys.platform == 'win32':
# Windows: use tasklist to find all rfcp-server.exe, kill all except self
try:
# Get list of all rfcp-server PIDs
result = subprocess.run(
['tasklist', '/FI', 'IMAGENAME eq rfcp-server.exe', '/FO', 'CSV', '/NH'],
capture_output=True, text=True, timeout=5
)
for line in result.stdout.strip().split('\n'):
if 'rfcp-server.exe' in line:
# Parse PID from CSV: "rfcp-server.exe","1234",...
parts = line.split(',')
if len(parts) >= 2:
pid_str = parts[1].strip().strip('"')
try:
pid = int(pid_str)
if pid != my_pid:
subprocess.run(
['taskkill', '/F', '/PID', str(pid)],
capture_output=True, timeout=5
)
killed_count += 1
_clog(f"Killed worker PID {pid}")
except (ValueError, subprocess.TimeoutExpired):
pass
except Exception as e:
_clog(f"Kill workers error: {e}")
# Fallback: kill ALL rfcp-server except hope main survives
try:
subprocess.run(
['taskkill', '/F', '/IM', 'rfcp-server.exe', '/T'],
capture_output=True, timeout=5
)
except:
pass
else:
# Unix: use pgrep/pkill
try:
result = subprocess.run(
['pgrep', '-f', 'rfcp-server'],
capture_output=True, text=True, timeout=5
)
for pid_str in result.stdout.strip().split('\n'):
if pid_str:
try:
pid = int(pid_str)
if pid != my_pid:
os.kill(pid, 9) # SIGKILL
killed_count += 1
_clog(f"Killed worker PID {pid}")
except (ValueError, ProcessLookupError, PermissionError):
pass
except Exception as e:
_clog(f"Kill workers error: {e}")
return killed_count
```
**Also update ProcessPoolExecutor to use spawn context explicitly:**
```python
import multiprocessing as mp
def _calculate_with_process_pool(...):
# Use spawn to ensure clean worker processes
ctx = mp.get_context('spawn')
pool = None
try:
pool = ProcessPoolExecutor(
max_workers=num_workers,
mp_context=ctx
)
# ... rest of code ...
finally:
if pool:
pool.shutdown(wait=False, cancel_futures=True)
# Give pool time to cleanup
import time
time.sleep(0.5)
# Then force kill any survivors
killed = _kill_worker_processes()
if killed > 0:
_clog(f"Force killed {killed} orphaned workers")
```
---
## 🔴 Bug 2.4.2b: Dominant Path — Add Logging + Reduce Limits
**Problem:**
Detailed preset still timeouts. Unknown if filter is being applied.
**File:** `backend/app/services/dominant_path_service.py`
**Add diagnostic logging to verify filter works:**
```python
# At the TOP of the file, add constants (if not already there):
MAX_BUILDINGS_FOR_LINE = 50 # Reduced from 100
MAX_BUILDINGS_FOR_REFLECTION = 30 # Reduced from 100
MAX_DISTANCE_FROM_PATH = 300 # Reduced from 500m
def _filter_buildings_by_distance(buildings, tx_point, rx_point, max_count, max_distance):
"""Filter buildings to nearest N within max_distance of path."""
original_count = len(buildings)
if original_count <= max_count:
_log(f"[FILTER] {original_count} buildings, no filter needed")
return buildings
# Calculate midpoint
mid_lat = (tx_point[0] + rx_point[0]) / 2
mid_lon = (tx_point[1] + rx_point[1]) / 2
# Calculate squared distance to midpoint (no sqrt for speed)
def dist_sq(b):
blat = b.get('centroid_lat') or b.get('lat', mid_lat)
blon = b.get('centroid_lon') or b.get('lon', mid_lon)
dlat = (blat - mid_lat) * 111000
dlon = (blon - mid_lon) * 111000 * 0.65 # cos(50°) ≈ 0.65
return dlat*dlat + dlon*dlon
# Sort by distance
buildings_sorted = sorted(buildings, key=dist_sq)
# Filter by max distance
max_dist_sq = max_distance * max_distance
filtered = [b for b in buildings_sorted if dist_sq(b) <= max_dist_sq]
# Take top N
result = filtered[:max_count]
_log(f"[FILTER] {original_count}{len(result)} buildings (max_count={max_count}, max_dist={max_distance}m)")
return result
```
**Verify the filter is CALLED in the main function:**
```python
def find_dominant_path_sync(tx, rx, buildings, vegetation, spatial_idx, frequency, ...):
"""Find dominant propagation path."""
# Get buildings from spatial index
line_buildings_raw = spatial_idx.query_line(tx['lat'], tx['lon'], rx['lat'], rx['lon'])
# FILTER - this MUST be called
line_buildings = _filter_buildings_by_distance(
line_buildings_raw,
(tx['lat'], tx['lon']),
(rx['lat'], rx['lon']),
max_count=MAX_BUILDINGS_FOR_LINE,
max_distance=MAX_DISTANCE_FROM_PATH
)
# Same for reflection candidates
mid_lat = (tx['lat'] + rx['lat']) / 2
mid_lon = (tx['lon'] + rx['lon']) / 2
refl_buildings_raw = spatial_idx.query_point(mid_lat, mid_lon, buffer_cells=3)
refl_buildings = _filter_buildings_by_distance(
refl_buildings_raw,
(tx['lat'], tx['lon']),
(rx['lat'], rx['lon']),
max_count=MAX_BUILDINGS_FOR_REFLECTION,
max_distance=MAX_DISTANCE_FROM_PATH
)
# Update diagnostic log to show FILTERED counts
if _point_counter[0] <= 3:
print(f"[DOMINANT_PATH] Point #{_point_counter[0]}: "
f"line_bldgs={len(line_buildings)} (was {len(line_buildings_raw)}), "
f"refl_bldgs={len(refl_buildings)} (was {len(refl_buildings_raw)})")
# ... rest of function ...
```
**If still too slow — option to DISABLE reflections:**
```python
# Quick fix: skip reflection calculation entirely
ENABLE_REFLECTIONS = False # Set to True when performance is fixed
def find_dominant_path_sync(...):
# Direct path
direct_loss = calculate_direct_path(...)
if not ENABLE_REFLECTIONS:
return direct_loss
# Reflection paths (slow)
reflection_loss = calculate_reflections(...)
return min(direct_loss, reflection_loss)
```
---
## 🟡 Bug 2.4.2c: Elevation Layer — Local Min/Max Contrast
**Problem:**
All green because using absolute thresholds (100m, 150m, 200m...) but local terrain varies only 150-200m.
**File:** `frontend/src/components/map/ElevationLayer.tsx`
**Fix — use RELATIVE coloring based on local min/max:**
```tsx
// Color palette (keep these)
const COLORS = {
DEEP_BLUE: [33, 102, 172], // Lowest
LIGHT_BLUE: [103, 169, 207],
GREEN: [145, 207, 96],
YELLOW: [254, 224, 139],
ORANGE: [252, 141, 89],
BROWN: [215, 48, 39], // Highest
};
// Interpolate between two colors
function interpolateColor(
color1: number[],
color2: number[],
t: number
): [number, number, number] {
return [
Math.round(color1[0] + (color2[0] - color1[0]) * t),
Math.round(color1[1] + (color2[1] - color1[1]) * t),
Math.round(color1[2] + (color2[2] - color1[2]) * t),
];
}
// NEW: Get color based on NORMALIZED elevation (0-1)
function getColorForNormalizedElevation(normalized: number): [number, number, number] {
// Clamp to 0-1
const n = Math.max(0, Math.min(1, normalized));
if (n < 0.2) {
// 0-20%: deep blue → light blue
return interpolateColor(COLORS.DEEP_BLUE, COLORS.LIGHT_BLUE, n / 0.2);
} else if (n < 0.4) {
// 20-40%: light blue → green
return interpolateColor(COLORS.LIGHT_BLUE, COLORS.GREEN, (n - 0.2) / 0.2);
} else if (n < 0.6) {
// 40-60%: green → yellow
return interpolateColor(COLORS.GREEN, COLORS.YELLOW, (n - 0.4) / 0.2);
} else if (n < 0.8) {
// 60-80%: yellow → orange
return interpolateColor(COLORS.YELLOW, COLORS.ORANGE, (n - 0.6) / 0.2);
} else {
// 80-100%: orange → brown
return interpolateColor(COLORS.ORANGE, COLORS.BROWN, (n - 0.8) / 0.2);
}
}
// In the main render function, USE local min/max:
useEffect(() => {
// ... fetch elevation data ...
const data = await response.json();
// Get LOCAL min/max from the actual data
const minElev = data.min_elevation; // e.g., 152
const maxElev = data.max_elevation; // e.g., 198
const elevRange = maxElev - minElev || 1; // Avoid division by zero
console.log(`[Elevation] Local range: ${minElev}m - ${maxElev}m (${elevRange}m difference)`);
// Fill pixel data with NORMALIZED colors
for (let i = 0; i < data.rows; i++) {
for (let j = 0; j < data.cols; j++) {
const elevation = data.elevations[i][j];
// Normalize to 0-1 based on LOCAL range
const normalized = (elevation - minElev) / elevRange;
const color = getColorForNormalizedElevation(normalized);
const idx = (i * data.cols + j) * 4;
imageData.data[idx] = color[0]; // R
imageData.data[idx + 1] = color[1]; // G
imageData.data[idx + 2] = color[2]; // B
imageData.data[idx + 3] = 255; // A (full opacity, layer opacity handled separately)
}
}
// ... rest of canvas/overlay code ...
}, [enabled, opacity, bbox, map]);
```
**Also add elevation legend showing LOCAL range:**
```tsx
// In the parent component (App.tsx or Map.tsx), show legend:
{showElevation && elevationRange && (
<div className="elevation-legend">
<div className="legend-title">Elevation</div>
<div className="legend-gradient"></div>
<div className="legend-labels">
<span>{elevationRange.min}m</span>
<span>{elevationRange.max}m</span>
</div>
</div>
)}
// CSS for gradient:
.legend-gradient {
height: 10px;
background: linear-gradient(to right,
#2166ac, /* deep blue - low */
#67a9cf, /* light blue */
#91cf60, /* green */
#fee08b, /* yellow */
#fc8d59, /* orange */
#d73027 /* brown - high */
);
border-radius: 2px;
}
```
---
## 🟢 Enhancement 2.4.2d: GPU Install Message
**File:** `backend/app/services/gpu_service.py`
**Add clear install instructions on startup:**
```python
# At module init:
GPU_AVAILABLE = False
GPU_INFO = None
try:
import cupy as cp
# Check CUDA
device_count = cp.cuda.runtime.getDeviceCount()
if device_count > 0:
GPU_AVAILABLE = True
props = cp.cuda.runtime.getDeviceProperties(0)
GPU_INFO = {
'name': props['name'].decode() if isinstance(props['name'], bytes) else str(props['name']),
'memory_mb': props['totalGlobalMem'] // (1024 * 1024),
'cuda_version': cp.cuda.runtime.runtimeGetVersion(),
}
print(f"[GPU] ✓ CUDA available: {GPU_INFO['name']} ({GPU_INFO['memory_mb']} MB)")
else:
print("[GPU] ✗ No CUDA devices found")
except ImportError:
print("[GPU] ✗ CuPy not installed — using CPU/NumPy")
print("[GPU] To enable GPU acceleration, install CuPy:")
print("[GPU] ")
print("[GPU] For CUDA 12.x: pip install cupy-cuda12x")
print("[GPU] For CUDA 11.x: pip install cupy-cuda11x")
print("[GPU] ")
print("[GPU] Check CUDA version: nvidia-smi")
except Exception as e:
print(f"[GPU] ✗ CuPy error: {e}")
print("[GPU] GPU acceleration disabled")
```
---
## 📁 Files to Modify
| File | Changes |
|------|---------|
| `backend/app/services/parallel_coverage_service.py` | Rewrite `_kill_worker_processes()` to kill by name; add spawn context |
| `backend/app/services/dominant_path_service.py` | Add detailed filter logging; reduce limits to 50/30; add ENABLE_REFLECTIONS flag |
| `frontend/src/components/map/ElevationLayer.tsx` | Use local min/max for color normalization |
| `backend/app/services/gpu_service.py` | Add clear install instructions |
---
## 🧪 Testing
### Test 1: Memory Cleanup
```bash
# Run Detailed preset (will timeout)
# Watch Task Manager during and after
# After timeout message:
# - Should see only 1 rfcp-server.exe
# - RAM should drop from 8GB to <500MB
```
### Test 2: Dominant Path Logging
```bash
# Run debug mode, watch console
# Should see:
# [FILTER] 646 → 50 buildings (max_count=50, max_dist=300m)
# [DOMINANT_PATH] Point #1: line_bldgs=50 (was 646), refl_bldgs=30 (was 302)
```
### Test 3: Elevation Contrast
```bash
# Open app
# Enable elevation layer
# Should see color variation:
# - Blue in valleys
# - Green/yellow on slopes
# - Brown/orange on hills
# Console should show: "[Elevation] Local range: 152m - 198m"
```
### Test 4: GPU Message
```bash
# Start server, check console
# Should see clear message about CuPy install
```
---
## ✅ Success Criteria
- [ ] After timeout: only 1 rfcp-server.exe, RAM < 500MB
- [ ] Dominant path logs show filtered counts (50 buildings, not 600)
- [ ] Detailed preset completes in <120s OR logs explain why still slow
- [ ] Elevation layer shows visible terrain contrast
- [ ] GPU install instructions visible in console
---
## 📈 Expected Results After Fixes
| Metric | Before | After |
|--------|--------|-------|
| Workers after timeout | 8 (7.8GB) | 1 (<500MB) |
| Buildings per point | 600+ | 50 |
| Detailed time | 300s timeout | ~60-90s |
| Elevation | All green | Color gradient |
---
## 🔜 Next Phase
After 2.4.2:
- Phase 2.5: Loading screen with fun facts
- Phase 2.5: Better error messages in UI
- Phase 2.6: Export coverage to GeoJSON/KML

View File

@@ -84,9 +84,15 @@ async def calculate_coverage(request: CoverageRequest) -> CoverageResponse:
) )
except asyncio.TimeoutError: except asyncio.TimeoutError:
cancel_token.cancel() cancel_token.cancel()
raise HTTPException(408, "Calculation timeout (5 min) — try smaller radius or lower resolution") # Force cleanup orphaned worker processes
from app.services.parallel_coverage_service import _kill_worker_processes
killed = _kill_worker_processes()
detail = f"Calculation timeout (5 min). Cleaned up {killed} workers." if killed else "Calculation timeout (5 min) — try smaller radius or lower resolution"
raise HTTPException(408, detail)
except asyncio.CancelledError: except asyncio.CancelledError:
cancel_token.cancel() cancel_token.cancel()
from app.services.parallel_coverage_service import _kill_worker_processes
_kill_worker_processes()
raise HTTPException(499, "Client disconnected") raise HTTPException(499, "Client disconnected")
computation_time = time.time() - start_time computation_time = time.time() - start_time

View File

@@ -1,3 +1,5 @@
import os
import asyncio
import multiprocessing as mp import multiprocessing as mp
from fastapi import APIRouter from fastapi import APIRouter
@@ -42,3 +44,17 @@ async def get_system_info():
"gpu": gpu_info, "gpu": gpu_info,
"gpu_available": gpu_info.get("available", False), "gpu_available": gpu_info.get("available", False),
} }
@router.post("/shutdown")
async def shutdown():
"""Graceful shutdown endpoint. Kills worker processes and exits."""
from app.services.parallel_coverage_service import _kill_worker_processes
killed = _kill_worker_processes()
# Schedule hard exit after response is sent
loop = asyncio.get_event_loop()
loop.call_later(0.5, lambda: os._exit(0))
return {"status": "shutting down", "workers_killed": killed}

View File

@@ -21,6 +21,45 @@ class RayPath:
is_valid: bool # Does this path exist? is_valid: bool # Does this path exist?
MAX_BUILDINGS_FOR_LINE = 100
MAX_BUILDINGS_FOR_REFLECTION = 100
MAX_DISTANCE_FROM_PATH = 500 # meters
def _filter_buildings_by_distance(buildings, tx_point, rx_point, max_count=100, max_distance=500):
"""Filter buildings to only those close to the TX-RX path.
Sort by distance to path midpoint, filter by max_distance, take top max_count.
Uses squared Euclidean distance (no sqrt) for speed.
"""
if len(buildings) <= max_count:
return buildings
mid_lat = (tx_point[0] + rx_point[0]) / 2
mid_lon = (tx_point[1] + rx_point[1]) / 2
max_dist_sq = max_distance * max_distance
def dist_sq_to_midpoint(building):
# Building centroid from geometry or fallback to midpoint
geom = building.geometry
if geom:
blat = sum(p[1] for p in geom) / len(geom)
blon = sum(p[0] for p in geom) / len(geom)
else:
blat, blon = mid_lat, mid_lon
dlat = (blat - mid_lat) * 111000
dlon = (blon - mid_lon) * 111000 * 0.7 # rough cos correction
return dlat * dlat + dlon * dlon
scored = [(b, dist_sq_to_midpoint(b)) for b in buildings]
scored.sort(key=lambda x: x[1])
# Filter by max distance and take top N
filtered = [b for b, d in scored if d <= max_dist_sq]
return filtered[:max_count]
class DominantPathService: class DominantPathService:
""" """
Find dominant propagation paths (2-3 strongest) Find dominant propagation paths (2-3 strongest)
@@ -420,6 +459,15 @@ class DominantPathService:
else: else:
line_buildings = buildings line_buildings = buildings
# Filter to limit building count — prevents 600+ buildings per point
original_line_count = len(line_buildings)
line_buildings = _filter_buildings_by_distance(
line_buildings,
(tx_lat, tx_lon), (rx_lat, rx_lon),
max_count=MAX_BUILDINGS_FOR_LINE,
max_distance=MAX_DISTANCE_FROM_PATH,
)
direct = self._check_direct_path_sync( direct = self._check_direct_path_sync(
tx_lat, tx_lon, tx_height, tx_lat, tx_lon, tx_height,
rx_lat, rx_lon, rx_height, rx_lat, rx_lon, rx_height,
@@ -442,13 +490,22 @@ class DominantPathService:
else: else:
reflection_buildings = buildings reflection_buildings = buildings
# Filter reflection buildings to limit count
original_refl_count = len(reflection_buildings)
reflection_buildings = _filter_buildings_by_distance(
reflection_buildings,
(tx_lat, tx_lon), (rx_lat, rx_lon),
max_count=MAX_BUILDINGS_FOR_REFLECTION,
max_distance=MAX_DISTANCE_FROM_PATH,
)
# Log building counts for first 3 points so user can verify filtering # Log building counts for first 3 points so user can verify filtering
DominantPathService._log_count += 1 DominantPathService._log_count += 1
if DominantPathService._log_count <= 3: if DominantPathService._log_count <= 3:
import sys import sys
msg = (f"[DOMINANT_PATH] Point #{DominantPathService._log_count}: " msg = (f"[DOMINANT_PATH] Point #{DominantPathService._log_count}: "
f"line_bldgs={len(line_buildings)}, " f"line_bldgs={len(line_buildings)} (from {original_line_count}), "
f"refl_bldgs={len(reflection_buildings)}, " f"refl_bldgs={len(reflection_buildings)} (from {original_refl_count}), "
f"total_available={len(buildings)}, " f"total_available={len(buildings)}, "
f"spatial_idx={'YES' if spatial_idx else 'NO'}, " f"spatial_idx={'YES' if spatial_idx else 'NO'}, "
f"early_exit={'YES' if direct and direct.is_valid and not direct.materials_crossed else 'NO'}") f"early_exit={'YES' if direct and direct.is_valid and not direct.materials_crossed else 'NO'}")

View File

@@ -28,6 +28,7 @@ import threading
import multiprocessing as mp import multiprocessing as mp
from typing import List, Dict, Tuple, Any, Optional, Callable from typing import List, Dict, Tuple, Any, Optional, Callable
import numpy as np import numpy as np
import psutil
# ── Cancellation token ── # ── Cancellation token ──
@@ -46,6 +47,46 @@ class CancellationToken:
return self._event.is_set() return self._event.is_set()
# ── Worker process cleanup ──
def _kill_worker_processes() -> int:
"""Kill all child processes of the current process.
Uses psutil to find and terminate/kill child processes that may be
orphaned after ProcessPoolExecutor timeout or cancellation.
Returns the number of children killed.
"""
try:
current = psutil.Process(os.getpid())
children = current.children(recursive=True)
except (psutil.NoSuchProcess, psutil.AccessDenied):
return 0
if not children:
return 0
count = len(children)
# First: graceful terminate
for child in children:
try:
child.terminate()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
# Wait up to 3 seconds for graceful exit
gone, alive = psutil.wait_procs(children, timeout=3)
# Force kill survivors
for p in alive:
try:
p.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
return count
# ── Try to import Ray ── # ── Try to import Ray ──
RAY_AVAILABLE = False RAY_AVAILABLE = False
@@ -426,10 +467,12 @@ def _calculate_with_process_pool(
t_calc = time.time() t_calc = time.time()
all_results: List[Dict] = [] all_results: List[Dict] = []
pool = None
with ProcessPoolExecutor(max_workers=num_workers) as executor: try:
pool = ProcessPoolExecutor(max_workers=num_workers)
futures = { futures = {
executor.submit( pool.submit(
_pool_worker_process_chunk, _pool_worker_process_chunk,
(chunk, terrain_cache, buildings, osm_data, config), (chunk, terrain_cache, buildings, osm_data, config),
): i ): i
@@ -460,6 +503,17 @@ def _calculate_with_process_pool(
log_fn(f"Progress: {completed_chunks}/{len(chunks)} chunks ({pct}%) — " log_fn(f"Progress: {completed_chunks}/{len(chunks)} chunks ({pct}%) — "
f"{pts} pts, {rate:.0f} pts/s, ETA {eta:.0f}s") f"{pts} pts, {rate:.0f} pts/s, ETA {eta:.0f}s")
except Exception as e:
log_fn(f"ProcessPool error: {e}")
finally:
# CRITICAL: Always cleanup pool and orphaned workers
if pool:
pool.shutdown(wait=False, cancel_futures=True)
killed = _kill_worker_processes()
if killed > 0:
log_fn(f"Killed {killed} orphaned worker processes")
calc_time = time.time() - t_calc calc_time = time.time() - t_calc
log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results " log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "
f"({calc_time / max(1, total_points) * 1000:.1f}ms/point)") f"({calc_time / max(1, total_points) * 1000:.1f}ms/point)")

View File

@@ -11,6 +11,7 @@ requests==2.31.0
httpx==0.27.0 httpx==0.27.0
aiosqlite>=0.19.0 aiosqlite>=0.19.0
sqlalchemy>=2.0.0 sqlalchemy>=2.0.0
psutil>=5.9.0
ray[default]>=2.9.0 ray[default]>=2.9.0
# GPU acceleration (optional — install cupy-cuda12x for NVIDIA GPU support) # GPU acceleration (optional — install cupy-cuda12x for NVIDIA GPU support)
# cupy-cuda12x>=13.0.0 # cupy-cuda12x>=13.0.0

View File

@@ -276,7 +276,10 @@ function createMainWindow() {
store.set('windowState', bounds); store.set('windowState', bounds);
} catch (_e) {} } catch (_e) {}
isQuitting = true; isQuitting = true;
// Graceful shutdown is async but we also do sync kill as safety net
gracefulShutdown().catch(() => {});
killBackend(); killBackend();
killAllBackendProcesses();
}); });
// Load frontend // Load frontend
@@ -360,6 +363,67 @@ function killBackend() {
log(`[KILL] Backend cleanup complete (PID was ${pid})`); log(`[KILL] Backend cleanup complete (PID was ${pid})`);
} }
/**
* Nuclear option: kill ALL rfcp-server processes by name.
* This catches orphaned workers that PID-based kill misses.
*/
function killAllBackendProcesses() {
log('[KILL] killAllBackendProcesses() — killing by process name...');
if (process.platform === 'win32') {
try {
execSync('taskkill /F /IM rfcp-server.exe /T', {
stdio: 'ignore',
timeout: 5000
});
log('[KILL] taskkill /IM rfcp-server.exe completed');
} catch (_e) {
// Error means no processes found — OK
log('[KILL] No rfcp-server.exe processes found (or already killed)');
}
} else {
try {
execSync('pkill -9 -f rfcp-server', {
stdio: 'ignore',
timeout: 5000
});
log('[KILL] pkill rfcp-server completed');
} catch (_e) {
log('[KILL] No rfcp-server processes found');
}
}
}
/**
* Graceful shutdown: ask backend to clean up, then force kill everything.
*/
async function gracefulShutdown() {
log('[SHUTDOWN] Requesting graceful shutdown...');
// Step 1: Ask backend to clean up workers and exit
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 2000);
await fetch('http://127.0.0.1:8888/api/system/shutdown', {
method: 'POST',
signal: controller.signal
});
clearTimeout(timeout);
log('[SHUTDOWN] Backend acknowledged shutdown');
} catch (_e) {
log('[SHUTDOWN] Backend did not respond — force killing');
}
// Step 2: Wait briefly for graceful exit
await new Promise(r => setTimeout(r, 500));
// Step 3: PID-based kill (catches the main process)
killBackend();
// Step 4: Name-based kill (catches orphaned workers)
killAllBackendProcesses();
}
// ── App lifecycle ────────────────────────────────────────────────── // ── App lifecycle ──────────────────────────────────────────────────
app.whenReady().then(async () => { app.whenReady().then(async () => {
@@ -393,6 +457,7 @@ app.on('window-all-closed', () => {
log('[CLOSE] window-all-closed fired'); log('[CLOSE] window-all-closed fired');
isQuitting = true; isQuitting = true;
killBackend(); killBackend();
killAllBackendProcesses();
if (process.platform !== 'darwin') { if (process.platform !== 'darwin') {
app.quit(); app.quit();
@@ -409,11 +474,13 @@ app.on('before-quit', () => {
log('[CLOSE] before-quit fired'); log('[CLOSE] before-quit fired');
isQuitting = true; isQuitting = true;
killBackend(); killBackend();
killAllBackendProcesses();
}); });
app.on('will-quit', () => { app.on('will-quit', () => {
log('[CLOSE] will-quit fired'); log('[CLOSE] will-quit fired');
killBackend(); killBackend();
killAllBackendProcesses();
if (backendLogStream) { if (backendLogStream) {
try { backendLogStream.end(); } catch (_e) {} try { backendLogStream.end(); } catch (_e) {}
@@ -427,6 +494,7 @@ process.on('exit', () => {
console.log(`[KILL] process.exit handler, backendPid=${backendPid}`); console.log(`[KILL] process.exit handler, backendPid=${backendPid}`);
} catch (_e) { /* log stream may be closed */ } } catch (_e) { /* log stream may be closed */ }
// PID-based kill
if (backendPid) { if (backendPid) {
try { try {
if (process.platform === 'win32') { if (process.platform === 'win32') {
@@ -438,6 +506,24 @@ process.on('exit', () => {
// Best effort // Best effort
} }
} }
// Name-based kill — catches orphaned workers
killAllBackendProcesses();
});
// Handle SIGINT/SIGTERM (Ctrl+C, system shutdown)
process.on('SIGINT', () => {
try { log('[SIGNAL] SIGINT received'); } catch (_e) {}
killBackend();
killAllBackendProcesses();
process.exit(0);
});
process.on('SIGTERM', () => {
try { log('[SIGNAL] SIGTERM received'); } catch (_e) {}
killBackend();
killAllBackendProcesses();
process.exit(0);
}); });
// ── IPC Handlers ─────────────────────────────────────────────────── // ── IPC Handlers ───────────────────────────────────────────────────

View File

@@ -1 +1 @@
{"detail":"Calculation timeout (5 min) — try smaller radius or lower resolution"} {"detail":"Calculation timeout (5 min). Cleaned up 6 workers."}

File diff suppressed because one or more lines are too long