@mytec: iter2.5 vectorization start

This commit is contained in:
2026-02-01 13:13:39 +02:00
parent 4026233b21
commit acc90fe538
8 changed files with 747 additions and 71 deletions

View File

@@ -23,12 +23,12 @@ Usage:
import os
import sys
import subprocess
import time
import threading
import multiprocessing as mp
from typing import List, Dict, Tuple, Any, Optional, Callable
import numpy as np
import psutil
# ── Cancellation token ──
@@ -49,42 +49,77 @@ class CancellationToken:
# ── Worker process cleanup ──
def _clog(msg: str):
"""Log with [PARALLEL] prefix."""
print(f"[PARALLEL] {msg}", flush=True)
def _kill_worker_processes() -> int:
"""Kill all child processes of the current process.
"""Kill ALL rfcp-server processes except the current (main) process.
Uses psutil to find and terminate/kill child processes that may be
orphaned after ProcessPoolExecutor timeout or cancellation.
Returns the number of children killed.
Uses process NAME matching instead of PID tree because psutil.children()
cannot see grandchildren spawned by ProcessPoolExecutor workers.
Returns the number of processes killed.
"""
try:
current = psutil.Process(os.getpid())
children = current.children(recursive=True)
except (psutil.NoSuchProcess, psutil.AccessDenied):
return 0
my_pid = os.getpid()
killed_count = 0
if not children:
return 0
count = len(children)
# First: graceful terminate
for child in children:
if sys.platform == 'win32':
try:
child.terminate()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
# Wait up to 3 seconds for graceful exit
gone, alive = psutil.wait_procs(children, timeout=3)
# Force kill survivors
for p in alive:
# List all rfcp-server.exe processes in CSV format
result = subprocess.run(
['tasklist', '/FI', 'IMAGENAME eq rfcp-server.exe', '/FO', 'CSV', '/NH'],
capture_output=True, text=True, timeout=5,
)
for line in result.stdout.strip().split('\n'):
if 'rfcp-server.exe' not in line:
continue
parts = line.split(',')
if len(parts) >= 2:
pid_str = parts[1].strip().strip('"')
try:
pid = int(pid_str)
if pid != my_pid:
subprocess.run(
['taskkill', '/F', '/PID', str(pid)],
capture_output=True, timeout=5,
)
killed_count += 1
_clog(f"Killed worker PID {pid}")
except (ValueError, subprocess.TimeoutExpired):
pass
except Exception as e:
_clog(f"Kill workers error: {e}")
# Fallback: kill ALL rfcp-server.exe
try:
subprocess.run(
['taskkill', '/F', '/IM', 'rfcp-server.exe', '/T'],
capture_output=True, timeout=5,
)
except Exception:
pass
else:
# Unix: pgrep + kill
try:
p.kill()
except (psutil.NoSuchProcess, psutil.AccessDenied):
pass
result = subprocess.run(
['pgrep', '-f', 'rfcp-server'],
capture_output=True, text=True, timeout=5,
)
for pid_str in result.stdout.strip().split('\n'):
if not pid_str:
continue
try:
pid = int(pid_str)
if pid != my_pid:
os.kill(pid, 9) # SIGKILL
killed_count += 1
_clog(f"Killed worker PID {pid}")
except (ValueError, ProcessLookupError, PermissionError):
pass
except Exception as e:
_clog(f"Kill workers error: {e}")
return count
return killed_count
# ── Try to import Ray ──
@@ -470,7 +505,9 @@ def _calculate_with_process_pool(
pool = None
try:
pool = ProcessPoolExecutor(max_workers=num_workers)
# Use spawn context for clean worker processes
ctx = mp.get_context('spawn')
pool = ProcessPoolExecutor(max_workers=num_workers, mp_context=ctx)
futures = {
pool.submit(
_pool_worker_process_chunk,
@@ -510,9 +547,12 @@ def _calculate_with_process_pool(
# CRITICAL: Always cleanup pool and orphaned workers
if pool:
pool.shutdown(wait=False, cancel_futures=True)
# Give pool time to cleanup gracefully
time.sleep(0.5)
# Then force kill any survivors by process name
killed = _kill_worker_processes()
if killed > 0:
log_fn(f"Killed {killed} orphaned worker processes")
log_fn(f"Force killed {killed} orphaned workers")
calc_time = time.time() - t_calc
log_fn(f"ProcessPool done: {calc_time:.1f}s, {len(all_results)} results "