@mytec: iter3.5.1 ready for testing
This commit is contained in:
@@ -33,3 +33,9 @@ async def gpu_set_device(request: SetDeviceRequest):
|
||||
return {"status": "ok", **result}
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/diagnostics")
|
||||
async def gpu_diagnostics():
|
||||
"""Full GPU diagnostic info for troubleshooting detection issues."""
|
||||
return gpu_manager.get_diagnostics()
|
||||
|
||||
@@ -167,6 +167,66 @@ class GPUManager:
|
||||
for d in self._devices
|
||||
]
|
||||
|
||||
def get_diagnostics(self) -> dict:
|
||||
"""Full diagnostic info for troubleshooting GPU detection."""
|
||||
import sys
|
||||
import platform
|
||||
|
||||
diag = {
|
||||
"python_version": sys.version,
|
||||
"platform": platform.platform(),
|
||||
"numpy": {"version": np.__version__},
|
||||
"cuda": {},
|
||||
"opencl": {},
|
||||
"detected_devices": len(self._devices),
|
||||
"active_backend": self._active_backend.value,
|
||||
}
|
||||
|
||||
# Check CuPy/CUDA
|
||||
try:
|
||||
import cupy as cp
|
||||
diag["cuda"]["cupy_version"] = cp.__version__
|
||||
diag["cuda"]["cuda_runtime_version"] = cp.cuda.runtime.runtimeGetVersion()
|
||||
diag["cuda"]["device_count"] = cp.cuda.runtime.getDeviceCount()
|
||||
for i in range(diag["cuda"]["device_count"]):
|
||||
props = cp.cuda.runtime.getDeviceProperties(i)
|
||||
name = props["name"]
|
||||
if isinstance(name, bytes):
|
||||
name = name.decode()
|
||||
diag["cuda"][f"device_{i}"] = {
|
||||
"name": str(name),
|
||||
"memory_mb": props["totalGlobalMem"] // (1024 * 1024),
|
||||
"compute_capability": f"{props['major']}.{props['minor']}",
|
||||
}
|
||||
except ImportError:
|
||||
diag["cuda"]["error"] = "CuPy not installed"
|
||||
diag["cuda"]["install_hint"] = "pip install cupy-cuda12x"
|
||||
except Exception as e:
|
||||
diag["cuda"]["error"] = str(e)
|
||||
|
||||
# Check PyOpenCL
|
||||
try:
|
||||
import pyopencl as cl
|
||||
diag["opencl"]["pyopencl_version"] = cl.VERSION_TEXT
|
||||
diag["opencl"]["platforms"] = []
|
||||
for p in cl.get_platforms():
|
||||
platform_info = {"name": p.name.strip(), "devices": []}
|
||||
for d in p.get_devices():
|
||||
platform_info["devices"].append({
|
||||
"name": d.name.strip(),
|
||||
"type": cl.device_type.to_string(d.type),
|
||||
"memory_mb": d.global_mem_size // (1024 * 1024),
|
||||
"compute_units": d.max_compute_units,
|
||||
})
|
||||
diag["opencl"]["platforms"].append(platform_info)
|
||||
except ImportError:
|
||||
diag["opencl"]["error"] = "PyOpenCL not installed"
|
||||
diag["opencl"]["install_hint"] = "pip install pyopencl"
|
||||
except Exception as e:
|
||||
diag["opencl"]["error"] = str(e)
|
||||
|
||||
return diag
|
||||
|
||||
def set_device(self, backend: str, index: int = 0) -> dict:
|
||||
"""Switch active compute device."""
|
||||
target_backend = GPUBackend(backend)
|
||||
|
||||
Reference in New Issue
Block a user