@mytec: iter3.5.1 ready for testing

This commit is contained in:
2026-02-03 12:04:36 +02:00
parent 255b91f257
commit 20d19d09ae
14 changed files with 1583 additions and 8 deletions

View File

@@ -33,3 +33,9 @@ async def gpu_set_device(request: SetDeviceRequest):
return {"status": "ok", **result}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
@router.get("/diagnostics")
async def gpu_diagnostics():
"""Full GPU diagnostic info for troubleshooting detection issues."""
return gpu_manager.get_diagnostics()

View File

@@ -167,6 +167,66 @@ class GPUManager:
for d in self._devices
]
def get_diagnostics(self) -> dict:
"""Full diagnostic info for troubleshooting GPU detection."""
import sys
import platform
diag = {
"python_version": sys.version,
"platform": platform.platform(),
"numpy": {"version": np.__version__},
"cuda": {},
"opencl": {},
"detected_devices": len(self._devices),
"active_backend": self._active_backend.value,
}
# Check CuPy/CUDA
try:
import cupy as cp
diag["cuda"]["cupy_version"] = cp.__version__
diag["cuda"]["cuda_runtime_version"] = cp.cuda.runtime.runtimeGetVersion()
diag["cuda"]["device_count"] = cp.cuda.runtime.getDeviceCount()
for i in range(diag["cuda"]["device_count"]):
props = cp.cuda.runtime.getDeviceProperties(i)
name = props["name"]
if isinstance(name, bytes):
name = name.decode()
diag["cuda"][f"device_{i}"] = {
"name": str(name),
"memory_mb": props["totalGlobalMem"] // (1024 * 1024),
"compute_capability": f"{props['major']}.{props['minor']}",
}
except ImportError:
diag["cuda"]["error"] = "CuPy not installed"
diag["cuda"]["install_hint"] = "pip install cupy-cuda12x"
except Exception as e:
diag["cuda"]["error"] = str(e)
# Check PyOpenCL
try:
import pyopencl as cl
diag["opencl"]["pyopencl_version"] = cl.VERSION_TEXT
diag["opencl"]["platforms"] = []
for p in cl.get_platforms():
platform_info = {"name": p.name.strip(), "devices": []}
for d in p.get_devices():
platform_info["devices"].append({
"name": d.name.strip(),
"type": cl.device_type.to_string(d.type),
"memory_mb": d.global_mem_size // (1024 * 1024),
"compute_units": d.max_compute_units,
})
diag["opencl"]["platforms"].append(platform_info)
except ImportError:
diag["opencl"]["error"] = "PyOpenCL not installed"
diag["opencl"]["install_hint"] = "pip install pyopencl"
except Exception as e:
diag["opencl"]["error"] = str(e)
return diag
def set_device(self, backend: str, index: int = 0) -> dict:
"""Switch active compute device."""
target_backend = GPUBackend(backend)