Skip to content

Commit e9e0417

Browse files
committed
GPU: Keep track of registered memory and unregister
1 parent 4dc2f59 commit e9e0417

9 files changed

Lines changed: 63 additions & 20 deletions

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,31 @@ void GPUReconstruction::PrintMemoryStatistics()
905905
}
906906
}
907907

908+
int GPUReconstruction::registerMemoryForGPU(const void* ptr, size_t size)
909+
{
910+
if (mProcessingSettings.noGPUMemoryRegistration) {
911+
return 0;
912+
}
913+
int retVal = registerMemoryForGPU_internal(ptr, size);
914+
if (retVal == 0) {
915+
mRegisteredMemoryPtrs.emplace(ptr);
916+
}
917+
return retVal;
918+
}
919+
920+
int GPUReconstruction::unregisterMemoryForGPU(const void* ptr)
921+
{
922+
if (mProcessingSettings.noGPUMemoryRegistration) {
923+
return 0;
924+
}
925+
const auto& pos = mRegisteredMemoryPtrs.find(ptr);
926+
if (pos != mRegisteredMemoryPtrs.end()) {
927+
mRegisteredMemoryPtrs.erase(pos);
928+
return unregisterMemoryForGPU_internal(ptr);
929+
}
930+
return 1;
931+
}
932+
908933
template <class T>
909934
static inline int getStepNum(T step, bool validCheck, int N, const char* err = "Invalid step num")
910935
{

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <iosfwd>
2424
#include <vector>
2525
#include <unordered_map>
26+
#include <unordered_set>
2627

2728
#include "GPUTRDDef.h"
2829
#include "GPUParam.h"
@@ -184,8 +185,8 @@ class GPUReconstruction
184185
virtual int RunChains() = 0;
185186
unsigned int getNEventsProcessed() { return mNEventsProcessed; }
186187
unsigned int getNEventsProcessedInStat() { return mStatNEvents; }
187-
virtual int registerMemoryForGPU(const void* ptr, size_t size) = 0;
188-
virtual int unregisterMemoryForGPU(const void* ptr) = 0;
188+
int registerMemoryForGPU(const void* ptr, size_t size);
189+
int unregisterMemoryForGPU(const void* ptr);
189190
virtual void* getGPUPointer(void* ptr) { return ptr; }
190191
virtual void startGPUProfiling() {}
191192
virtual void endGPUProfiling() {}
@@ -288,6 +289,9 @@ class GPUReconstruction
288289
int EnqueuePipeline(bool terminate = false);
289290
GPUChain* GetNextChainInQueue();
290291

292+
virtual int registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
293+
virtual int unregisterMemoryForGPU_internal(const void* ptr) = 0;
294+
291295
// Management for GPU thread contexts
292296
class GPUThreadContext
293297
{
@@ -364,6 +368,8 @@ class GPUReconstruction
364368
void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized
365369
size_t mDeviceMemoryUsedMax = 0; //
366370

371+
std::unordered_set<const void*> mRegisteredMemoryPtrs; // List of pointers registered for GPU
372+
367373
GPUReconstruction* mMaster = nullptr; // Ptr to a GPUReconstruction object serving as master, sharing GPU memory, events, etc.
368374
std::vector<GPUReconstruction*> mSlaves; // Ptr to slave GPUReconstructions
369375

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,6 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
141141
constexpr static const char* GetKernelName();
142142

143143
virtual int GPUDebug(const char* state = "UNKNOWN", int stream = -1, bool force = false);
144-
int registerMemoryForGPU(const void* ptr, size_t size) override { return 0; }
145-
int unregisterMemoryForGPU(const void* ptr) override { return 0; }
146144
int GPUStuck() { return mGPUStuck; }
147145
void ResetDeviceProcessorTypes();
148146
template <class T>
@@ -165,6 +163,9 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
165163

166164
GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) {}
167165

166+
int registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; }
167+
int unregisterMemoryForGPU_internal(const void* ptr) override { return 0; }
168+
168169
virtual void SynchronizeStream(int stream) {}
169170
virtual void SynchronizeEvents(deviceEvent* evList, int nEvents = 1) {}
170171
virtual void StreamWaitForEvents(int stream, deviceEvent* evList, int nEvents = 1) {}

GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,12 +300,20 @@ int GPUReconstructionDeviceBase::ExitDevice()
300300
return retVal;
301301
}
302302

303-
int GPUReconstructionDeviceBase::registerMemoryForGPU(const void* ptr, size_t size)
303+
int GPUReconstructionDeviceBase::registerMemoryForGPU_internal(const void* ptr, size_t size)
304304
{
305305
return IsGPU();
306306
}
307307

308-
int GPUReconstructionDeviceBase::unregisterMemoryForGPU(const void* ptr)
308+
int GPUReconstructionDeviceBase::unregisterMemoryForGPU_internal(const void* ptr)
309309
{
310310
return IsGPU();
311311
}
312+
313+
void GPUReconstructionDeviceBase::unregisterRemainingRegisteredMemory()
314+
{
315+
for (auto& ptr : mRegisteredMemoryPtrs) {
316+
unregisterMemoryForGPU_internal(ptr);
317+
}
318+
mRegisteredMemoryPtrs.clear();
319+
}

GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
4242
virtual int InitDevice_Runtime() = 0;
4343
int ExitDevice() override;
4444
virtual int ExitDevice_Runtime() = 0;
45-
int registerMemoryForGPU(const void* ptr, size_t size) override;
46-
int unregisterMemoryForGPU(const void* ptr) override;
45+
int registerMemoryForGPU_internal(const void* ptr, size_t size) override;
46+
int unregisterMemoryForGPU_internal(const void* ptr) override;
47+
void unregisterRemainingRegisteredMemory();
4748

4849
virtual const GPUTPCTracker* CPUTracker(int iSlice) { return &processors()->tpcTrackers[iSlice]; }
4950

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ int GPUReconstructionCUDA::ExitDevice_Runtime()
396396
// Uninitialize CUDA
397397
GPUFailedMsg(cudaSetDevice(mDeviceId));
398398
SynchronizeGPU();
399+
unregisterRemainingRegisteredMemory();
399400

400401
for (unsigned int i = 0; i < mEvents.size(); i++) {
401402
cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data();
@@ -553,14 +554,14 @@ int GPUReconstructionCUDA::PrepareTextures()
553554
return (0);
554555
}
555556

556-
int GPUReconstructionCUDA::registerMemoryForGPU(const void* ptr, size_t size)
557+
int GPUReconstructionCUDA::registerMemoryForGPU_internal(const void* ptr, size_t size)
557558
{
558-
return mProcessingSettings.noGPUMemoryRegistration ? 0 : GPUFailedMsgI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault));
559+
return GPUFailedMsgI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault));
559560
}
560561

561-
int GPUReconstructionCUDA::unregisterMemoryForGPU(const void* ptr)
562+
int GPUReconstructionCUDA::unregisterMemoryForGPU_internal(const void* ptr)
562563
{
563-
return mProcessingSettings.noGPUMemoryRegistration ? 0 : GPUFailedMsgI(cudaHostUnregister((void*)ptr));
564+
return GPUFailedMsgI(cudaHostUnregister((void*)ptr));
564565
}
565566

566567
void GPUReconstructionCUDA::startGPUProfiling()

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
7575
bool IsEventDone(deviceEvent* evList, int nEvents = 1) override;
7676

7777
int PrepareTextures() override;
78-
int registerMemoryForGPU(const void* ptr, size_t size) override;
79-
int unregisterMemoryForGPU(const void* ptr) override;
78+
int registerMemoryForGPU_internal(const void* ptr, size_t size) override;
79+
int unregisterMemoryForGPU_internal(const void* ptr) override;
8080
void startGPUProfiling() override;
8181
void endGPUProfiling() override;
8282

GPU/GPUTracking/Base/hip/GPUReconstructionHIP.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ class GPUReconstructionHIPBackend : public GPUReconstructionDeviceBase
5050
void SynchronizeEvents(deviceEvent* evList, int nEvents = 1) override;
5151
void StreamWaitForEvents(int stream, deviceEvent* evList, int nEvents = 1) override;
5252
bool IsEventDone(deviceEvent* evList, int nEvents = 1) override;
53-
int registerMemoryForGPU(const void* ptr, size_t size) override;
54-
int unregisterMemoryForGPU(const void* ptr) override;
53+
int registerMemoryForGPU_internal(const void* ptr, size_t size) override;
54+
int unregisterMemoryForGPU_internal(const void* ptr) override;
5555
void* getGPUPointer(void* ptr) override;
5656

5757
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int stream = -1, deviceEvent ev = nullptr) override;

GPU/GPUTracking/Base/hip/GPUReconstructionHIP.hip.cxx

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ int GPUReconstructionHIPBackend::ExitDevice_Runtime()
469469
// Uninitialize HIP
470470
GPUFailedMsgI(hipSetDevice(mDeviceId));
471471
SynchronizeGPU();
472+
unregisterRemainingRegisteredMemory();
472473

473474
for (unsigned int i = 0; i < mEvents.size(); i++) {
474475
hipEvent_t* events = (hipEvent_t*)mEvents[i].data();
@@ -626,14 +627,14 @@ int GPUReconstructionHIPBackend::GPUDebug(const char* state, int stream, bool fo
626627
return (0);
627628
}
628629

629-
int GPUReconstructionHIPBackend::registerMemoryForGPU(const void* ptr, size_t size)
630+
int GPUReconstructionHIPBackend::registerMemoryForGPU_internal(const void* ptr, size_t size)
630631
{
631-
return mProcessingSettings.noGPUMemoryRegistration ? 0 : GPUFailedMsgI(hipHostRegister((void*)ptr, size, hipHostRegisterDefault));
632+
return GPUFailedMsgI(hipHostRegister((void*)ptr, size, hipHostRegisterDefault));
632633
}
633634

634-
int GPUReconstructionHIPBackend::unregisterMemoryForGPU(const void* ptr)
635+
int GPUReconstructionHIPBackend::unregisterMemoryForGPU_internal(const void* ptr)
635636
{
636-
return mProcessingSettings.noGPUMemoryRegistration ? 0 : GPUFailedMsgI(hipHostUnregister((void*)ptr));
637+
return GPUFailedMsgI(hipHostUnregister((void*)ptr));
637638
}
638639

639640
void* GPUReconstructionHIPBackend::getGPUPointer(void* ptr)

0 commit comments

Comments
 (0)