forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathGPUReconstructionCPUKernels.h
More file actions
98 lines (90 loc) · 3.69 KB
/
GPUReconstructionCPUKernels.h
File metadata and controls
98 lines (90 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.
/// \file GPUReconstructionCPUKernels.h
/// \author David Rohr
#ifndef GPURECONSTRUCTIONICPUKERNELS_H
#define GPURECONSTRUCTIONICPUKERNELS_H
#include "GPUReconstructionCPU.h"
#include "GPUSettings.h"
#include "GPULogging.h"
namespace o2::gpu
{
template <class S, int32_t I, typename... Args>
inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args const&... args)
{
HighResTimer* t = nullptr;
GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep();
if (myStep == GPUDataTypes::RecoStep::NoRecoStep) {
throw std::runtime_error("Failure running general kernel without defining RecoStep");
}
int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0;
uint32_t& nThreads = setup.x.nThreads;
uint32_t& nBlocks = setup.x.nBlocks;
const uint32_t stream = setup.x.stream;
auto prop = getKernelProperties<S, I>();
const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads;
const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mMultiprocessorCount));
if (nBlocks == (uint32_t)-1) {
nBlocks = (nThreads + autoThreads - 1) / autoThreads;
nThreads = autoThreads;
} else if (nBlocks == (uint32_t)-2) {
nBlocks = nThreads;
nThreads = autoThreads;
} else if (nBlocks == (uint32_t)-3) {
nBlocks = autoBlocks;
nThreads = autoThreads;
} else if ((int32_t)nThreads < 0) {
nThreads = cpuFallback ? 1 : -nThreads;
}
if (nThreads > GPUCA_MAX_THREADS) {
throw std::runtime_error("GPUCA_MAX_THREADS exceeded");
}
if (GetProcessingSettings().debugLevel >= 3) {
GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName<S, I>(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : (cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()));
}
if (nThreads == 0 || nBlocks == 0) {
return;
}
if (GetProcessingSettings().debugLevel >= 1) {
t = &getKernelTimer<S, I>(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream);
if ((!GetProcessingSettings().deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) {
t->Start();
}
}
double deviceTimerTime = 0.;
krnlSetupArgs<S, I, Args...> argPack{{}, {{setup.x, setup.y, setup.z}, deviceTimerTime}, {args...}};
const uint32_t num = GetKernelNum<S, I>();
if (cpuFallback) {
GPUReconstructionCPU::runKernelVirtual(num, &argPack);
} else {
runKernelVirtual(num, &argPack);
}
if (GPUDebug(GetKernelName<S, I>(), stream, GetProcessingSettings().serializeGPU & 1)) {
throw std::runtime_error("kernel failure");
}
if (GetProcessingSettings().debugLevel >= 1) {
if (t) {
if (deviceTimerTime != 0.) {
t->AddTime(deviceTimerTime);
if (t->IsRunning()) {
t->Abort();
}
} else if (t->IsRunning()) {
t->Stop();
}
}
if (CheckErrorCodes(cpuFallback) && !GetProcessingSettings().ignoreNonFatalGPUErrors) {
throw std::runtime_error("kernel error code");
}
}
}
} // namespace o2::gpu
#endif