forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtestGPUsortHIP.hip.cxx
More file actions
154 lines (127 loc) · 5.05 KB
/
testGPUsortHIP.hip.cxx
File metadata and controls
154 lines (127 loc) · 5.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.
/// \file testGPUsortHIP.hip.cxx
/// \author Michael Lettrich
#define GPUCA_GPUTYPE_VEGA
#define BOOST_TEST_MODULE Test GPUCommonAlgorithm Sorting HIP
#define BOOST_TEST_MAIN
#define BOOST_TEST_DYN_LINK
#include <iostream>
#include <cstring>
#include <hip/hip_runtime.h>
#include <boost/test/unit_test.hpp>
#include "GPUCommonAlgorithm.h"
///////////////////////////////////////////////////////////////
// Test setup and tear down
///////////////////////////////////////////////////////////////
static constexpr float TOLERANCE = 10 * std::numeric_limits<float>::epsilon();
hipError_t hipCheckError(hipError_t hipErrorCode)
{
if (hipErrorCode != hipSuccess) {
std::cerr << "ErrorCode " << hipErrorCode << " " << hipGetErrorName(hipErrorCode) << ": " << hipGetErrorString(hipErrorCode) << std::endl;
}
return hipErrorCode;
}
void hipCheckErrorFatal(hipError_t hipErrorCode)
{
if (hipCheckError(hipErrorCode) != hipSuccess) {
exit(-1);
}
}
struct TestEnvironment {
TestEnvironment() : size(101), data(nullptr), sorted(size)
{
hipCheckErrorFatal(hipHostMalloc(&data, size * sizeof(float), hipHostRegisterDefault));
// create an array of unordered floats with negative and positive values
for (size_t i = 0; i < size; i++) {
data[i] = size / 2.0f - i;
}
// create copy
std::memcpy(sorted.data(), data, size * sizeof(float));
// sort
std::sort(sorted.begin(), sorted.end());
}
~TestEnvironment() // NOLINT: clang-tidy doesn't understand hip macro magic, and thinks this is trivial
{
hipCheckErrorFatal(hipFree(data));
};
const size_t size;
float* data;
std::vector<float> sorted;
};
template <typename T>
void testAlmostEqualArray(T* correct, T* testing, size_t size)
{
for (size_t i = 0; i < size; i++) {
if (std::fabs(correct[i]) < TOLERANCE) {
BOOST_CHECK_SMALL(testing[i], TOLERANCE);
} else {
BOOST_CHECK_CLOSE(correct[i], testing[i], TOLERANCE);
}
}
}
///////////////////////////////////////////////////////////////
__global__ void sortInThread(float* data, size_t dataLength)
{
// make sure only one thread is working on this.
if (hipBlockIdx_x == 0 && hipBlockIdx_y == 0 && hipBlockIdx_z == 0 && hipThreadIdx_x == 0 && hipThreadIdx_y == 0 && hipThreadIdx_z == 0) {
o2::gpu::CAAlgo::sort(data, data + dataLength);
}
}
__global__ void sortInThreadWithOperator(float* data, size_t dataLength)
{
// make sure only one thread is working on this.
if (hipBlockIdx_x == 0 && hipBlockIdx_y == 0 && hipBlockIdx_z == 0 && hipThreadIdx_x == 0 && hipThreadIdx_y == 0 && hipThreadIdx_z == 0) {
o2::gpu::CAAlgo::sort(data, data + dataLength, [](float a, float b) { return a < b; });
}
}
///////////////////////////////////////////////////////////////
__global__ void sortInBlock(float* data, size_t dataLength)
{
o2::gpu::CAAlgo::sortInBlock<float>(data, data + dataLength);
}
__global__ void sortInBlockWithOperator(float* data, size_t dataLength)
{
o2::gpu::CAAlgo::sortInBlock(data, data + dataLength, [](float a, float b) { return a < b; });
}
///////////////////////////////////////////////////////////////
BOOST_AUTO_TEST_SUITE(TestsortInThread)
BOOST_FIXTURE_TEST_CASE(GPUsortThreadHIP, TestEnvironment)
{
hipLaunchKernelGGL(sortInThread, dim3(1), dim3(1), 0, 0, data, size);
// sortInThread<<<dim3(1), dim3(1), 0, 0>>>(data, size);
BOOST_CHECK_EQUAL(hipCheckError(hipDeviceSynchronize()), hipSuccess);
testAlmostEqualArray(sorted.data(), data, size);
}
BOOST_FIXTURE_TEST_CASE(GPUsortThreadOperatorHIP, TestEnvironment)
{
hipLaunchKernelGGL(sortInThreadWithOperator, dim3(1), dim3(1), 0, 0, data, size);
// sortInThreadWithOperator<<<dim3(1), dim3(1), 0, 0>>>(data, size);
BOOST_CHECK_EQUAL(hipCheckError(hipDeviceSynchronize()), hipSuccess);
testAlmostEqualArray(sorted.data(), data, size);
}
BOOST_AUTO_TEST_SUITE_END()
BOOST_AUTO_TEST_SUITE(TestsortInBlock)
BOOST_FIXTURE_TEST_CASE(GPUsortBlockHIP, TestEnvironment)
{
hipLaunchKernelGGL(sortInBlock, dim3(1), dim3(128), 0, 0, data, size);
// sortInBlock<<<dim3(1), dim3(128), 0, 0>>>(data, size);
BOOST_CHECK_EQUAL(hipCheckError(hipDeviceSynchronize()), hipSuccess);
testAlmostEqualArray(sorted.data(), data, size);
}
BOOST_FIXTURE_TEST_CASE(GPUsortBlockOperatorHIP, TestEnvironment)
{
hipLaunchKernelGGL(sortInBlockWithOperator, dim3(1), dim3(128), 0, 0, data, size);
// sortInBlockWithOperator<<<dim3(1), dim3(128), 0, 0>>>(data, size);
BOOST_CHECK_EQUAL(hipCheckError(hipDeviceSynchronize()), hipSuccess);
testAlmostEqualArray(sorted.data(), data, size);
}
BOOST_AUTO_TEST_SUITE_END()