Skip to content

Commit 7b64bfc

Browse files
committed
demo
1 parent 79ea4b7 commit 7b64bfc

1 file changed

Lines changed: 43 additions & 41 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
#include "ITStrackingGPU/TrackerTraitsGPU.h"
3838
#include "ITStrackingGPU/TrackingKernels.h"
3939

40+
#include "GPUCommonHelpers.h"
41+
4042
#ifndef __HIPCC__
4143
#define THRUST_NAMESPACE thrust::cuda
4244
#else
@@ -56,19 +58,19 @@
5658
#include "DetectorsBase/Propagator.h"
5759
using namespace o2::track;
5860

59-
#define gpuCheckError(x) \
60-
{ \
61-
gpuAssert((x), __FILE__, __LINE__); \
62-
}
63-
inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
64-
{
65-
if (code != cudaSuccess) {
66-
LOGF(error, "GPUassert: %s %s %d", cudaGetErrorString(code), file, line);
67-
if (abort) {
68-
throw std::runtime_error("GPU assert failed.");
69-
}
70-
}
71-
}
61+
// #define gpuCheckError(x) \
62+
// { \
63+
// gpuAssert((x), __FILE__, __LINE__); \
64+
// }
65+
// inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
66+
// {
67+
// if (code != cudaSuccess) {
68+
// LOGF(error, "GPUassert: %s %s %d", cudaGetErrorString(code), file, line);
69+
// if (abort) {
70+
// throw std::runtime_error("GPU assert failed.");
71+
// }
72+
// }
73+
// }
7274

7375
namespace o2::its
7476
{
@@ -878,20 +880,20 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
878880
mulScatAng[iLayer]);
879881
void* d_temp_storage = nullptr;
880882
size_t temp_storage_bytes = 0;
881-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
883+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
882884
temp_storage_bytes, // temp_storage_bytes
883885
trackletsLUTsHost[iLayer], // d_in
884886
trackletsLUTsHost[iLayer], // d_out
885887
nClusters[iLayer] + 1, // num_items
886888
0)); // NOLINT: this is the offset of the sum, not a pointer
887889
discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes));
888-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
890+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
889891
temp_storage_bytes, // temp_storage_bytes
890892
trackletsLUTsHost[iLayer], // d_in
891893
trackletsLUTsHost[iLayer], // d_out
892894
nClusters[iLayer] + 1, // num_items
893895
0)); // NOLINT: this is the offset of the sum, not a pointer
894-
gpuCheckError(cudaFree(d_temp_storage));
896+
GPUChkErrS(cudaFree(d_temp_storage));
895897
}
896898
}
897899

@@ -960,24 +962,24 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
960962
auto unique_end = thrust::unique(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::equal_tracklets());
961963
nTracklets[iLayer] = unique_end - tracklets_ptr;
962964
if (iLayer > 0) {
963-
gpuCheckError(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int)));
965+
GPUChkErrS(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int)));
964966
gpu::compileTrackletsLookupTableKernel<<<nBlocks, nThreads>>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]);
965967
void* d_temp_storage = nullptr;
966968
size_t temp_storage_bytes = 0;
967-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
969+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
968970
temp_storage_bytes, // temp_storage_bytes
969971
trackletsLUTsHost[iLayer], // d_in
970972
trackletsLUTsHost[iLayer], // d_out
971973
nClusters[iLayer] + 1, // num_items
972974
0)); // NOLINT: this is the offset of the sum, not a pointer
973975
discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes));
974-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
976+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
975977
temp_storage_bytes, // temp_storage_bytes
976978
trackletsLUTsHost[iLayer], // d_in
977979
trackletsLUTsHost[iLayer], // d_out
978980
nClusters[iLayer] + 1, // num_items
979981
0)); // NOLINT: this is the offset of the sum, not a pointer
980-
gpuCheckError(cudaFree(d_temp_storage));
982+
GPUChkErrS(cudaFree(d_temp_storage));
981983
}
982984
}
983985
}
@@ -1016,20 +1018,20 @@ void countCellsHandler(
10161018
nSigmaCut); // const float
10171019
void* d_temp_storage = nullptr;
10181020
size_t temp_storage_bytes = 0;
1019-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
1021+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
10201022
temp_storage_bytes, // temp_storage_bytes
10211023
cellsLUTsHost, // d_in
10221024
cellsLUTsHost, // d_out
10231025
nTracklets + 1, // num_items
10241026
0)); // NOLINT: this is the offset of the sum, not a pointer
10251027
discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes));
1026-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
1028+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
10271029
temp_storage_bytes, // temp_storage_bytes
10281030
cellsLUTsHost, // d_in
10291031
cellsLUTsHost, // d_out
10301032
nTracklets + 1, // num_items
10311033
0)); // NOLINT: this is the offset of the sum, not a pointer
1032-
gpuCheckError(cudaFree(d_temp_storage));
1034+
GPUChkErrS(cudaFree(d_temp_storage));
10331035
}
10341036

10351037
void computeCellsHandler(
@@ -1094,37 +1096,37 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice,
10941096

10951097
void *d_temp_storage = nullptr, *d_temp_storage_2 = nullptr;
10961098
size_t temp_storage_bytes = 0, temp_storage_bytes_2 = 0;
1097-
gpuCheckError(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage
1099+
GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage
10981100
temp_storage_bytes, // temp_storage_bytes
10991101
neighboursLUT, // d_in
11001102
neighboursLUT, // d_out
11011103
nCellsNext)); // num_items
11021104

11031105
discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes));
1104-
gpuCheckError(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage
1106+
GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage
11051107
temp_storage_bytes, // temp_storage_bytes
11061108
neighboursLUT, // d_in
11071109
neighboursLUT, // d_out
11081110
nCellsNext)); // num_items
11091111

1110-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage
1112+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage
11111113
temp_storage_bytes_2, // temp_storage_bytes
11121114
neighboursIndexTable, // d_in
11131115
neighboursIndexTable, // d_out
11141116
nCells + 1, // num_items
11151117
0)); // NOLINT: this is the offset of the sum, not a pointer
11161118

11171119
discardResult(cudaMalloc(&d_temp_storage_2, temp_storage_bytes_2));
1118-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage
1120+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage
11191121
temp_storage_bytes_2, // temp_storage_bytes
11201122
neighboursIndexTable, // d_in
11211123
neighboursIndexTable, // d_out
11221124
nCells + 1, // num_items
11231125
0)); // NOLINT: this is the offset of the sum, not a pointer
11241126
unsigned int nNeighbours;
1125-
gpuCheckError(cudaMemcpy(&nNeighbours, &neighboursLUT[nCellsNext - 1], sizeof(unsigned int), cudaMemcpyDeviceToHost));
1126-
gpuCheckError(cudaFree(d_temp_storage));
1127-
gpuCheckError(cudaFree(d_temp_storage_2));
1127+
GPUChkErrS(cudaMemcpy(&nNeighbours, &neighboursLUT[nCellsNext - 1], sizeof(unsigned int), cudaMemcpyDeviceToHost));
1128+
GPUChkErrS(cudaFree(d_temp_storage));
1129+
GPUChkErrS(cudaFree(d_temp_storage_2));
11281130
return nNeighbours;
11291131
}
11301132

@@ -1155,8 +1157,8 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice,
11551157
layerIndex,
11561158
nCells,
11571159
maxCellNeighbours);
1158-
gpuCheckError(cudaPeekAtLastError());
1159-
gpuCheckError(cudaDeviceSynchronize());
1160+
GPUChkErrS(cudaPeekAtLastError());
1161+
GPUChkErrS(cudaDeviceSynchronize());
11601162
}
11611163
11621164
int filterCellNeighboursHandler(std::vector<int>& neighHost, // TODO: eventually remove this!
@@ -1182,7 +1184,7 @@ int filterCellNeighboursHandler(std::vector<int>& neighHost, // TODO: eventually
11821184
auto trimmedSize = sortedNeigh.end() - trimmedBegin;
11831185
neighHost.resize(trimmedSize);
11841186
thrust::transform(trimmedBegin, sortedNeigh.end(), validNeighs, gpu::pair_to_first<int, int>());
1185-
gpuCheckError(cudaMemcpy(neighHost.data(), cellNeighbours, trimmedSize * sizeof(int), cudaMemcpyDeviceToHost));
1187+
GPUChkErrS(cudaMemcpy(neighHost.data(), cellNeighbours, trimmedSize * sizeof(int), cudaMemcpyDeviceToHost));
11861188
11871189
return trimmedSize;
11881190
}
@@ -1230,14 +1232,14 @@ void processNeighboursHandler(const int startLayer,
12301232
matCorrType);
12311233
void* d_temp_storage = nullptr;
12321234
size_t temp_storage_bytes = 0;
1233-
gpuCheckError(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage
1235+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage
12341236
temp_storage_bytes, // temp_storage_bytes
12351237
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in
12361238
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out
12371239
nCells[startLayer] + 1, // num_items
12381240
0)); // NOLINT: this is the offset of the sum, not a pointer
12391241
discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes));
1240-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
1242+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
12411243
temp_storage_bytes, // temp_storage_bytes
12421244
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in
12431245
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out
@@ -1265,7 +1267,7 @@ void processNeighboursHandler(const int startLayer,
12651267
propagator,
12661268
matCorrType);
12671269
auto t1 = updatedCellSeed.size();
1268-
gpuCheckError(cudaFree(d_temp_storage));
1270+
GPUChkErrS(cudaFree(d_temp_storage));
12691271
int level = startLevel;
12701272
for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) {
12711273
temp_storage_bytes = 0;
@@ -1294,14 +1296,14 @@ void processNeighboursHandler(const int startLayer,
12941296
maxChi2ClusterAttachment,
12951297
propagator,
12961298
matCorrType);
1297-
gpuCheckError(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage
1299+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage
12981300
temp_storage_bytes, // temp_storage_bytes
12991301
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in
13001302
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out
13011303
nCells[iLayer] + 1, // num_items
13021304
0)); // NOLINT: this is the offset of the sum, not a pointer
13031305
discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes));
1304-
gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
1306+
GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage
13051307
temp_storage_bytes, // temp_storage_bytes
13061308
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in
13071309
thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out
@@ -1330,7 +1332,7 @@ void processNeighboursHandler(const int startLayer,
13301332
maxChi2ClusterAttachment,
13311333
propagator,
13321334
matCorrType);
1333-
gpuCheckError(cudaFree(d_temp_storage));
1335+
GPUChkErrS(cudaFree(d_temp_storage));
13341336
}
13351337
thrust::device_vector<CellSeed> outSeeds(updatedCellSeed.size());
13361338
auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5)));
@@ -1370,8 +1372,8 @@ void trackSeedHandler(CellSeed* trackSeeds,
13701372
thrust::device_ptr<o2::its::TrackITSExt> tr_ptr(tracks);
13711373
13721374
thrust::sort(tr_ptr, tr_ptr + nSeeds, gpu::compare_track_chi2());
1373-
gpuCheckError(cudaPeekAtLastError());
1374-
gpuCheckError(cudaDeviceSynchronize());
1375+
GPUChkErrS(cudaPeekAtLastError());
1376+
GPUChkErrS(cudaDeviceSynchronize());
13751377
}
13761378
13771379
template void countTrackletsInROFsHandler<7>(const IndexTableUtils* utils,

0 commit comments

Comments
 (0)