Files
rfcp/backend/app/services/cache.py
mytec defa3ad440 @mytec: feat: Phase 3.0 Architecture Refactor
Major refactoring of RFCP backend:
- Modular propagation models (8 models)
- SharedMemoryManager for terrain data
- ProcessPoolExecutor parallel processing
- WebSocket progress streaming
- Building filtering pipeline (351k → 15k)
- 82 unit tests

Performance: Standard preset 38s → 5s (7.6x speedup)

Known issue: Detailed preset timeout (fix in 3.1.0)
2026-02-01 23:12:26 +02:00

251 lines
7.5 KiB
Python

"""
Unified cache management for RFCP services.
Provides a single interface for managing all cached data:
- Terrain tiles (SRTM .hgt files, in-memory NumPy arrays)
- OSM building data (disk JSON + in-memory)
- Spatial index data
Tracks memory usage and enforces limits to prevent
memory explosion during large-area calculations.
"""
import os
import sys
import json
import time
import threading
from pathlib import Path
from typing import Optional, Dict, Any, Callable
from datetime import datetime, timedelta
class CacheEntry:
"""Single cache entry with metadata."""
__slots__ = ('value', 'created_at', 'last_accessed', 'size_bytes', 'hits')
def __init__(self, value: Any, size_bytes: int = 0):
self.value = value
self.created_at = time.monotonic()
self.last_accessed = self.created_at
self.size_bytes = size_bytes
self.hits = 0
def touch(self):
self.last_accessed = time.monotonic()
self.hits += 1
class MemoryCache:
"""
In-memory LRU cache with byte-level tracking.
Thread-safe. Evicts least-recently-used entries when
max_size_bytes is exceeded.
"""
def __init__(self, name: str, max_entries: int = 100, max_size_bytes: int = 500 * 1024 * 1024):
self.name = name
self.max_entries = max_entries
self.max_size_bytes = max_size_bytes
self._entries: Dict[str, CacheEntry] = {}
self._lock = threading.Lock()
self._total_bytes = 0
self._total_hits = 0
self._total_misses = 0
def get(self, key: str) -> Optional[Any]:
with self._lock:
entry = self._entries.get(key)
if entry is None:
self._total_misses += 1
return None
entry.touch()
self._total_hits += 1
return entry.value
def put(self, key: str, value: Any, size_bytes: int = 0):
with self._lock:
# Remove existing entry if present
if key in self._entries:
self._total_bytes -= self._entries[key].size_bytes
del self._entries[key]
# Evict if over limits
while (
len(self._entries) >= self.max_entries
or (self._total_bytes + size_bytes > self.max_size_bytes and self._entries)
):
self._evict_lru()
entry = CacheEntry(value, size_bytes)
self._entries[key] = entry
self._total_bytes += size_bytes
def remove(self, key: str) -> bool:
with self._lock:
entry = self._entries.pop(key, None)
if entry:
self._total_bytes -= entry.size_bytes
return True
return False
def clear(self):
with self._lock:
self._entries.clear()
self._total_bytes = 0
def _evict_lru(self):
"""Remove least-recently-used entry. Must hold _lock."""
if not self._entries:
return
lru_key = min(self._entries, key=lambda k: self._entries[k].last_accessed)
entry = self._entries.pop(lru_key)
self._total_bytes -= entry.size_bytes
@property
def size(self) -> int:
return len(self._entries)
@property
def size_bytes(self) -> int:
return self._total_bytes
@property
def size_mb(self) -> float:
return self._total_bytes / (1024 * 1024)
def stats(self) -> dict:
total = self._total_hits + self._total_misses
return {
"name": self.name,
"entries": len(self._entries),
"size_mb": round(self.size_mb, 1),
"max_size_mb": round(self.max_size_bytes / (1024 * 1024), 1),
"hits": self._total_hits,
"misses": self._total_misses,
"hit_rate": round(self._total_hits / total * 100, 1) if total > 0 else 0,
}
class DiskCache:
"""
Persistent disk cache with TTL expiry.
Used for OSM building data and other HTTP responses.
"""
def __init__(self, name: str, base_path: Optional[Path] = None, ttl_days: int = 30):
self.name = name
self.ttl_days = ttl_days
if base_path is None:
base_path = Path(os.environ.get('RFCP_DATA_PATH', './data'))
self.cache_path = base_path / 'cache' / name
self.cache_path.mkdir(parents=True, exist_ok=True)
def _key_to_file(self, key: str) -> Path:
# Sanitize key for filesystem
safe = key.replace('/', '_').replace('\\', '_').replace(':', '_')
return self.cache_path / f"{safe}.json"
def get(self, key: str) -> Optional[Any]:
path = self._key_to_file(key)
if not path.exists():
return None
try:
data = json.loads(path.read_text())
cached_at = datetime.fromisoformat(data.get('_ts', '2000-01-01'))
if datetime.now() - cached_at > timedelta(days=self.ttl_days):
path.unlink(missing_ok=True)
return None
return data.get('v')
except Exception:
return None
def put(self, key: str, value: Any):
path = self._key_to_file(key)
try:
path.write_text(json.dumps({
'_ts': datetime.now().isoformat(),
'v': value,
}))
except Exception as e:
print(f"[DiskCache:{self.name}] Write error: {e}")
def remove(self, key: str) -> bool:
path = self._key_to_file(key)
if path.exists():
path.unlink()
return True
return False
def clear(self):
for f in self.cache_path.glob("*.json"):
f.unlink(missing_ok=True)
def size_mb(self) -> float:
total = sum(f.stat().st_size for f in self.cache_path.glob("*.json") if f.exists())
return total / (1024 * 1024)
def stats(self) -> dict:
files = list(self.cache_path.glob("*.json"))
return {
"name": self.name,
"entries": len(files),
"size_mb": round(self.size_mb(), 1),
"ttl_days": self.ttl_days,
}
class CacheManager:
"""
Unified cache manager for all RFCP services.
Provides:
- terrain: MemoryCache for SRTM tile arrays (~25MB each)
- buildings: MemoryCache for building lists
- spatial: MemoryCache for spatial index objects
- osm_disk: DiskCache for OSM API responses
"""
def __init__(self):
self.terrain = MemoryCache(
"terrain",
max_entries=20, # ~500MB max (25MB per tile)
max_size_bytes=500 * 1024 * 1024,
)
self.buildings = MemoryCache(
"buildings",
max_entries=50,
max_size_bytes=200 * 1024 * 1024,
)
self.spatial = MemoryCache(
"spatial_index",
max_entries=50,
max_size_bytes=100 * 1024 * 1024,
)
self.osm_disk = DiskCache("osm", ttl_days=30)
def clear_all(self):
"""Clear all caches."""
self.terrain.clear()
self.buildings.clear()
self.spatial.clear()
self.osm_disk.clear()
def stats(self) -> dict:
"""Get stats for all caches."""
return {
"terrain": self.terrain.stats(),
"buildings": self.buildings.stats(),
"spatial": self.spatial.stats(),
"osm_disk": self.osm_disk.stats(),
"total_memory_mb": round(
self.terrain.size_mb + self.buildings.size_mb + self.spatial.size_mb, 1
),
}
# Singleton
cache_manager = CacheManager()