Skip to content

Commit 35d0485

Browse files
[rocm-libraries] ROCm/rocm-libraries#6462 (commit d41d078)
[hiptensor] Several fixes and improvements for tests on Windows (#6462) ## Motivation Improve reliability of tests and fix Windows specific errors. ## Technical Details - Fix cleanup on yaml_test - Add extra datatype for plan cache tests to perform test on all ASICs - Fix error when providing stride data on reduction_test - Copy relevant amd*.dll to hiptensor/bin path so they have priority over the system DLLs - Force HIP runtime initialization before any test to get a clean error in case cause of issues ## Test Plan Run full set of tests on Windows and on Linux. ## Test Result - Tests are passing. ## Submission Checklist - [x] Look over the contributing guidelines at https://github.com/ROCm/ROCm/blob/develop/CONTRIBUTING.md#pull-requests.
1 parent 264f6f2 commit 35d0485

7 files changed

Lines changed: 107 additions & 10 deletions

File tree

test/00_unit/yaml_test.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include <algorithm>
2828
#include <cstdio>
29+
#include <cstdlib>
2930
#include <fstream>
3031
#include <iterator>
3132
#include <numeric>
@@ -138,10 +139,19 @@ int main(int argc, char* argv[])
138139
hiptensor::YamlConfigLoader<hiptensor::ContractionTestParams>::storeToFile(tmpFile, yee);
139140
auto yee1
140141
= hiptensor::YamlConfigLoader<hiptensor::ContractionTestParams>::loadFromFile(tmpFile);
141-
if(!yee1)
142-
{
143-
return -1;
144-
}
145142

146-
return 0;
143+
int result = yee1 ? 0 : -1;
144+
145+
// Remove temp file explicitly before quick_exit() bypasses the destructor.
146+
::remove(tmpFile.c_str());
147+
148+
// Flush LLVM's buffered streams and destroy ManagedStatic objects while all
149+
// libraries are still loaded, before post-main cleanup runs.
150+
hiptensor::llvmShutdown();
151+
152+
// Use quick_exit() to bypass C++ static destructors and atexit() handlers.
153+
// Post-main cleanup in linked DLLs (hiptensor, HIP runtime) crashes under
154+
// ctest due to DLL unload order differences vs a direct run. Our cleanup is
155+
// already complete via llvmShutdown() above.
156+
std::quick_exit(result);
147157
}

test/01_contraction/configs/plan_cache_test_params.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Log Level: [ HIPTENSOR_LOG_LEVEL_ERROR, HIPTENSOR_LOG_LEVEL_PERF_TRACE ]
33
Tensor Data Types:
44
- [ HIPTENSOR_R_32F, HIPTENSOR_R_32F, HIPTENSOR_R_32F, HIPTENSOR_R_32F, HIPTENSOR_R_32F ]
5+
- [ HIPTENSOR_R_16F, HIPTENSOR_R_16F, HIPTENSOR_R_16F, HIPTENSOR_R_16F, HIPTENSOR_R_16F ]
56
Algorithm Types:
67
- HIPTENSOR_ALGO_DEFAULT
78
Operators:

test/03_reduction/reduction_test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ namespace hiptensor
404404
&descC,
405405
nmodeC,
406406
extentCD.data(),
407-
stridesCD.data() ? nullptr : stridesCD.data(),
407+
stridesCD.empty() ? nullptr : stridesCD.data(),
408408
acDataType,
409409
0));
410410

@@ -414,7 +414,7 @@ namespace hiptensor
414414
&descD,
415415
nmodeD,
416416
extentCD.data(),
417-
stridesCD.data() ? nullptr : stridesCD.data(),
417+
stridesCD.empty() ? nullptr : stridesCD.data(),
418418
acDataType,
419419
0));
420420

test/CMakeLists.txt

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,44 @@ file(WRITE "${INSTALL_TEST_FILE}"
6363
# Target that will trigger build of all tests
6464
add_custom_target(hiptensor_tests)
6565

66+
# On Windows, DLL search order is: application dir -> System32 -> PATH.
67+
# A system-installed ROCm places amdhip64_7.dll and amd_comgr_3.dll in
68+
# System32, which always wins over PATH. If the test binary is compiled with
69+
# a newer toolchain but an older runtime is loaded from System32, kernel launches
70+
# may crash (SEH 0xC0000005) if the code-object format is incompatible.
71+
# Staging the correct runtime DLLs into CMAKE_RUNTIME_OUTPUT_DIRECTORY (the application
72+
# directory) ensures Windows finds them before System32.
73+
# This runs at CMake configure time so it takes effect on every cmake invocation.
74+
if(WIN32)
75+
# hip_DIR is set by find_package(HIP) to <HIP_ROOT>/lib/cmake/hip.
76+
# Walk up three directory levels to reach <HIP_ROOT>, then append /bin.
77+
get_filename_component(_hip_cmake_dir "${hip_DIR}" DIRECTORY) # .../lib/cmake
78+
get_filename_component(_hip_lib_dir "${_hip_cmake_dir}" DIRECTORY) # .../lib
79+
get_filename_component(_hip_root "${_hip_lib_dir}" DIRECTORY) # <HIP_ROOT>
80+
set(_hip_bin_dir "${_hip_root}/bin")
81+
82+
message(STATUS "HIP runtime DLL source: ${_hip_bin_dir}")
83+
message(STATUS "HIP runtime DLL destination: ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
84+
85+
if(EXISTS "${_hip_bin_dir}")
86+
file(MAKE_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
87+
file(GLOB _hip_dlls "${_hip_bin_dir}/amdhip64_7.dll")
88+
if(_hip_dlls)
89+
foreach(_dll ${_hip_dlls})
90+
get_filename_component(_dll_name "${_dll}" NAME)
91+
file(COPY "${_dll}" DESTINATION "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
92+
endforeach()
93+
list(LENGTH _hip_dlls _hip_dll_count)
94+
message(STATUS "Staged ${_hip_dll_count} HIP runtime DLL(s) into ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
95+
else()
96+
message(WARNING "No DLLs found in ${_hip_bin_dir} - tests may load wrong HIP runtime from System32")
97+
endif()
98+
else()
99+
message(WARNING "HIP DLL source directory not found: ${_hip_bin_dir} - "
100+
"tests may load wrong HIP runtime from System32")
101+
endif()
102+
endif()
103+
66104
set(HIPTENSOR_COMMON_TEST_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
67105
${CMAKE_CURRENT_SOURCE_DIR}/hip_resource.cpp
68106
${CMAKE_CURRENT_SOURCE_DIR}/hiptensor_gtest_main.cpp)
@@ -141,6 +179,14 @@ function(add_hiptensor_test BINARY_NAME YAML_CONFIG_FILE TEST_SOURCES)
141179
# Set working directory to bin/ for proper DLL/resource loading on Windows
142180
set_property(TEST ${BINARY_NAME} PROPERTY WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
143181
182+
# On Windows, ensure the ROCm/HIP DLLs (amdhip64_7.dll, etc.) are discoverable by ctest.
183+
# ctest may not inherit the user's PATH that includes the ROCm bin directory, causing
184+
# HIP runtime DLL load failures (SEH 0xC0000005) on the first HIP call in a test fixture.
185+
if(WIN32)
186+
set_property(TEST ${BINARY_NAME} PROPERTY ENVIRONMENT
187+
"PATH=${CMAKE_INSTALL_PREFIX}/bin$<SEMICOLON>${CMAKE_RUNTIME_OUTPUT_DIRECTORY}$<SEMICOLON>$ENV{PATH}")
188+
endif()
189+
144190
# Install with rocm pkg
145191
rocm_install_targets(
146192
TARGETS ${BINARY_NAME}
@@ -203,6 +249,14 @@ function(add_hiptensor_unit_test)
203249
# Set working directory to bin/ for proper DLL/resource loading on Windows
204250
set_property(TEST ${BINARY_NAME} PROPERTY WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
205251

252+
# On Windows, ensure the ROCm/HIP DLLs (amdhip64_7.dll, etc.) are discoverable by ctest.
253+
# ctest may not inherit the user's PATH that includes the ROCm bin directory, causing
254+
# HIP runtime DLL load failures (SEH 0xC0000005) on the first HIP call in a test fixture.
255+
if(WIN32)
256+
set_property(TEST ${BINARY_NAME} PROPERTY ENVIRONMENT
257+
"PATH=${CMAKE_INSTALL_PREFIX}/bin$<SEMICOLON>${CMAKE_RUNTIME_OUTPUT_DIRECTORY}$<SEMICOLON>$ENV{PATH}")
258+
endif()
259+
206260
# Install with rocm pkg
207261
rocm_install_targets(
208262
TARGETS ${BINARY_NAME}

test/hiptensor_gtest_main.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
#include "hiptensor_options.hpp"
2727
#include "utils.hpp"
2828

29+
#include <cstdio>
2930
#include <gtest/gtest.h>
31+
#include <hip/hip_runtime_api.h>
3032

3133
#include "llvm/command_line_parser.hpp"
3234

@@ -35,11 +37,26 @@ int main(int argc, char** argv)
3537
// Parse hiptensor test options
3638
hiptensor::parseOptions(argc, argv);
3739

40+
// Force HIP runtime initialization before any test fixture runs.
41+
// If HIP fails here we get a clean error rather than an SEH crash inside a
42+
// fixture, which would leave the C++ static-initialization guard locked and
43+
// cause all subsequent tests to deadlock ("resource deadlock would occur").
44+
int deviceCount = 0;
45+
hipError_t hipErr = hipGetDeviceCount(&deviceCount);
46+
if((hipErr != hipSuccess) || (deviceCount <= 0))
47+
{
48+
fprintf(
49+
stderr,
50+
"hipGetDeviceCount failed (%d: %s) — Device count: %d — aborting before tests run.\n",
51+
static_cast<int>(hipErr),
52+
hipGetErrorString(hipErr),
53+
deviceCount);
54+
return 1;
55+
}
56+
3857
// Initialize Google Tests
3958
testing::InitGoogleTest(&argc, argv);
4059

4160
// Run the tests
42-
int status = RUN_ALL_TESTS();
43-
44-
return status;
61+
return RUN_ALL_TESTS();
4562
}

test/llvm/yaml_parser.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,10 @@ namespace hiptensor
3939
static std::optional<ConfigT> loadFromString(std::string const& yaml = "");
4040
static void storeToFile(std::string const& filePath, ConfigT const& config);
4141
};
42+
43+
// Flush LLVM's buffered output streams and shut down LLVM's ManagedStatic
44+
// objects. Must be called before returning from main() in any binary that
45+
// uses LLVM YAML I/O, to prevent use-after-free at process exit when the
46+
// DLL unload order differs from a direct run (e.g. under ctest on Windows).
47+
void llvmShutdown();
4248
}

test/llvm/yaml_parser_config.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#include <vector>
2828

2929
#include <llvm/ObjectYAML/YAML.h>
30+
#include <llvm/Support/ManagedStatic.h>
31+
#include <llvm/Support/raw_ostream.h>
3032

3133
#include <hiptensor/hiptensor.h>
3234

@@ -543,4 +545,11 @@ namespace hiptensor
543545
template struct YamlConfigLoader<ContractionTestParams>;
544546
template struct YamlConfigLoader<PermutationTestParams>;
545547
template struct YamlConfigLoader<ReductionTestParams>;
548+
549+
void llvmShutdown()
550+
{
551+
llvm::outs().flush();
552+
llvm::errs().flush();
553+
llvm::llvm_shutdown();
554+
}
546555
}

0 commit comments

Comments
 (0)