Skip to content

Commit bf8c8a8

Browse files
Direct parsing for Nvidia devices from NVML (#297)
Add NVML-based GPU monitor with runtime dlopen loading (#242) This implements unified NVIDIA monitoring preferring NVML, but supporting nvidia-smi as a fallback. The *stable* headers needed for the NVML monitoring are copied from `nvml.h`, so as to avoid any build time dependency on CUDA. At runtime a `dlopen` call is made to see if the NVML methods can be used. --------- Co-authored-by: Graeme Andrew Stewart <graeme.andrew.stewart@desy.de>
1 parent 06850cf commit bf8c8a8

6 files changed

Lines changed: 488 additions & 13 deletions

File tree

CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ cmake_minimum_required(VERSION 3.10)
66
# From CMake 3, we can set the project version easily in one go
77
project(prmon VERSION 3.2.0 LANGUAGES CXX)
88

9+
# For newer CMakes, allow target_link_libraries to link targets defined in other directories
10+
if(POLICY CMP0079)
11+
cmake_policy(SET CMP0079 NEW)
12+
endif()
13+
914
# For newer CMakes use normalized install destination paths
1015
if(POLICY CMP0177)
1116
cmake_policy(SET CMP0177 NEW)
@@ -53,7 +58,7 @@ set(BUILD_GTESTS "${BUILD_GTESTS}"
5358

5459
# Define a default build type when using a single-mode tool like make/ninja
5560
# We make this default to Release, unless profiling is enabled, in which
56-
# case do RelWithDebInfo (bcause you need the debug symbols)
61+
# case do RelWithDebInfo (because you need the debug symbols)
5762
if(NOT CMAKE_BUILD_TYPE)
5863
if(PROFILE_GPROF OR PROFILE_GPERFTOOLS)
5964
set(CMAKE_BUILD_TYPE RelWithDebInfo)
@@ -85,7 +90,6 @@ if("${ARM_PROCESSOR}" STREQUAL "arm")
8590
message(STATUS "Disabled ABI warnings on ${CMAKE_HOST_SYSTEM_PROCESSOR}")
8691
endif()
8792

88-
8993
# Add package utilities to CMake path
9094
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}")
9195

@@ -122,6 +126,7 @@ target_include_directories(prmon PRIVATE ${PROJECT_SOURCE_DIR}/package/include)
122126
if (BUILD_BENCHMARK_LOG)
123127
target_include_directories(benchmark-log PRIVATE ${PROJECT_SOURCE_DIR}/package/include)
124128
endif(BUILD_BENCHMARK_LOG)
129+
125130
#--- create uninstall target ---------------------------------------------------
126131
include(cmake/prmonUninstall.cmake)
127132

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,17 @@ might not contain all metrics that the default approach supports, e.g.,
163163
If any of these issues are encountered, a relevant message is printed
164164
to notify the user.
165165

166+
### NVIDIA GPU Monitoring
167+
168+
When NVIDIA GPUs are present, `prmon` will preferentially use the
169+
NVIDIA Management Library (NVML) for direct GPU monitoring via
170+
`libnvidia-ml.so`. If NVML is not available or fails to initialise,
171+
`prmon` falls back to parsing the output of `nvidia-smi`.
172+
No extra build-time dependencies are needed: NVML symbols are loaded
173+
at runtime via `dlopen`, so `prmon` can be compiled on machines without
174+
NVIDIA drivers and will detect GPU support when run on a machine that
175+
has them.
176+
166177
### Environment Variables
167178

168179
The `PRMON_DISABLE_MONITOR` environment variable can be used to specify a comma

package/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,18 @@ add_executable(prmon src/prmon.cpp
44
src/MessageBase.cpp
55
src/parameter.cpp
66
src/netmon.cpp
7+
src/nvidiamon.cpp
78
src/iomon.cpp
89
src/cpumon.cpp
910
src/countmon.cpp
1011
src/wallmon.cpp
1112
src/memmon.cpp
12-
src/nvidiamon.cpp
1313
)
1414

1515
target_link_libraries(prmon PRIVATE
1616
nlohmann_json::nlohmann_json
1717
spdlog::spdlog_header_only
18+
dl
1819
)
1920

2021
if (BUILD_BENCHMARK_LOG)

0 commit comments

Comments
 (0)