forked from AliceO2Group/AliceO2
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathGPUReconstructionCPU.h
More file actions
109 lines (85 loc) · 5.56 KB
/
GPUReconstructionCPU.h
File metadata and controls
109 lines (85 loc) · 5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.
/// \file GPUReconstructionCPU.h
/// \author David Rohr
#ifndef GPURECONSTRUCTIONICPU_H
#define GPURECONSTRUCTIONICPU_H
#include "GPUReconstructionProcessing.h"
#include <stdexcept>
#include <vector>
namespace Ort
{
struct SessionOptions;
}
namespace o2::gpu
{
class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface<GPUReconstructionCPU, GPUReconstructionProcessing>
{
friend GPUReconstruction* GPUReconstruction::GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg);
friend class GPUChain;
public:
~GPUReconstructionCPU() override;
static constexpr krnlRunRange krnlRunRangeNone{0};
static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0};
template <class S, int32_t I = 0>
krnlProperties getKernelProperties(int gpu = -1);
template <class T, int32_t I = 0, typename... Args>
void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args);
virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false);
int32_t GPUStuck() { return mGPUStuck; }
void ResetDeviceProcessorTypes();
int32_t RunChains() override;
void UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream = -1);
protected:
struct GPUProcessorProcessors : public GPUProcessor {
GPUConstantMem* mProcessorsProc = nullptr;
void* SetPointersDeviceProcessor(void* mem);
int16_t mMemoryResProcessors = -1;
};
GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface<GPUReconstructionCPU, GPUReconstructionProcessing>(cfg) {}
int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; }
int32_t unregisterMemoryForGPU_internal(const void* ptr) override { return 0; }
virtual void SynchronizeStream(int32_t stream) {}
virtual void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) {}
virtual void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) {}
virtual bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return true; }
virtual void RecordMarker(deviceEvent* ev, int32_t stream) {}
virtual void SynchronizeGPU() {}
virtual void ReleaseEvent(deviceEvent ev) {}
size_t TransferMemoryResourceToGPU(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, true, res->Ptr(), res->PtrDevice()); }
size_t TransferMemoryResourceToHost(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, false, res->PtrDevice(), res->Ptr()); }
size_t TransferMemoryResourcesToGPU(GPUProcessor* proc, int32_t stream = -1, bool all = false) { return TransferMemoryResourcesHelper(proc, stream, all, true); }
size_t TransferMemoryResourcesToHost(GPUProcessor* proc, int32_t stream = -1, bool all = false) { return TransferMemoryResourcesHelper(proc, stream, all, false); }
size_t TransferMemoryResourceLinkToGPU(int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryResourceToGPU(&mMemoryResources[res], stream, ev, evList, nEvents); }
size_t TransferMemoryResourceLinkToHost(int16_t res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryResourceToHost(&mMemoryResources[res], stream, ev, evList, nEvents); }
virtual size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1);
virtual size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1);
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
virtual size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst);
// ONNX runtime
virtual void SetONNXGPUStream(Ort::SessionOptions&, int32_t, int32_t*) {}
int32_t InitDevice() override;
int32_t ExitDevice() override;
int32_t GetThread();
virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; }
// Pointers to tracker classes
GPUProcessorProcessors mProcShadow; // Host copy of tracker objects that will be used on the GPU
GPUConstantMem*& mProcessorsShadow = mProcShadow.mProcessorsProc;
uint32_t mBlockCount = 1;
uint32_t mThreadCount = 1;
uint32_t mWarpSize = 1;
private:
size_t TransferMemoryResourcesHelper(GPUProcessor* proc, int32_t stream, bool all, bool toGPU);
template <class S, int32_t I = 0, typename... Args>
void runKernelInterface(krnlSetup&& setup, Args const&... args);
};
} // namespace o2::gpu
#endif