Skip to content

Commit 5b36617

Browse files
committed
Wrap NVML calls inside try/except.
1 parent b65d227 commit 5b36617

1 file changed

Lines changed: 16 additions & 11 deletions

File tree

src/somd2/runner/_repex.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -516,18 +516,23 @@ def _check_device_memory(index):
516516
index: int
517517
The index of the CUDA device.
518518
"""
519-
from pynvml import (
520-
nvmlInit,
521-
nvmlShutdown,
522-
nvmlDeviceGetHandleByIndex,
523-
nvmlDeviceGetMemoryInfo,
524-
)
519+
try:
520+
from pynvml import (
521+
nvmlInit,
522+
nvmlShutdown,
523+
nvmlDeviceGetHandleByIndex,
524+
nvmlDeviceGetMemoryInfo,
525+
)
526+
527+
nvmlInit()
528+
handle = nvmlDeviceGetHandleByIndex(index)
529+
info = nvmlDeviceGetMemoryInfo(handle)
530+
result = (info.used, info.free, info.total)
531+
nvmlShutdown()
532+
except Exception as e:
533+
msg = f"Could not determine memory usage for device {index}: {e}"
534+
_logger.error(msg)
525535

526-
nvmlInit()
527-
handle = nvmlDeviceGetHandleByIndex(index)
528-
info = nvmlDeviceGetMemoryInfo(handle)
529-
result = (info.used, info.free, info.total)
530-
nvmlShutdown()
531536
return result
532537

533538

0 commit comments

Comments
 (0)